2014-11-11 17:05:02 -05:00
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
|
|
// Page heap.
|
|
|
|
|
//
|
2015-02-19 13:38:46 -05:00
|
|
|
// See malloc.go for overview.
|
|
|
|
|
|
|
|
|
|
package runtime
|
|
|
|
|
|
2015-11-02 14:09:24 -05:00
|
|
|
import (
|
|
|
|
|
"runtime/internal/atomic"
|
2015-11-11 12:39:30 -05:00
|
|
|
"runtime/internal/sys"
|
2015-11-02 14:09:24 -05:00
|
|
|
"unsafe"
|
|
|
|
|
)
|
2015-02-19 13:38:46 -05:00
|
|
|
|
runtime: support smaller physical pages than PhysPageSize
Most operations need an upper bound on the physical page size, which
is what sys.PhysPageSize is for (this is checked at runtime init on
Linux). However, a few operations need a *lower* bound on the physical
page size. Introduce a "minPhysPageSize" constant to act as this lower
bound and use it where it makes sense:
1) In addrspace_free, we have to query each page in the given range.
Currently we increment by the upper bound on the physical page
size, which means we may skip over pages if the true size is
smaller. Worse, we currently pass a result buffer that only has
enough room for one page. If there are actually multiple pages in
the range passed to mincore, the kernel will overflow this buffer.
Fix these problems by incrementing by the lower-bound on the
physical page size and by passing "1" for the length, which the
kernel will round up to the true physical page size.
2) In the write barrier, the bad pointer check tests for pointers to
the first physical page, which are presumably small integers
masquerading as pointers. However, if physical pages are smaller
than we think, we may have legitimate pointers below
sys.PhysPageSize. Hence, use minPhysPageSize for this test since
pointers should never fall below that.
In particular, this applies to ARM64 and MIPS. The runtime is
configured to use 64kB pages on ARM64, but by default Linux uses 4kB
pages. Similarly, the runtime assumes 16kB pages on MIPS, but both 4kB
and 16kB kernel configurations are common. This also applies to ARM on
systems where the runtime is recompiled to deal with a larger page
size. It is also a step toward making the runtime use only a
dynamically-queried page size.
Change-Id: I1fdfd18f6e7cbca170cc100354b9faa22fde8a69
Reviewed-on: https://go-review.googlesource.com/25020
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Austin Clements <austin@google.com>
2016-07-18 16:01:22 -04:00
|
|
|
// minPhysPageSize is a lower-bound on the physical page size. The
|
|
|
|
|
// true physical page size may be larger than this. In contrast,
|
|
|
|
|
// sys.PhysPageSize is an upper-bound on the physical page size.
|
|
|
|
|
const minPhysPageSize = 4096
|
|
|
|
|
|
2015-02-19 13:38:46 -05:00
|
|
|
// Main malloc heap.
|
|
|
|
|
// The heap itself is the "free[]" and "large" arrays,
|
|
|
|
|
// but all the other global data is here too.
|
2016-10-11 22:58:21 -04:00
|
|
|
//
|
|
|
|
|
// mheap must not be heap-allocated because it contains mSpanLists,
|
|
|
|
|
// which must not be heap-allocated.
|
|
|
|
|
//
|
|
|
|
|
//go:notinheap
|
2015-02-19 13:38:46 -05:00
|
|
|
type mheap struct {
|
|
|
|
|
lock mutex
|
2017-03-27 14:20:35 -04:00
|
|
|
free [_MaxMHeapList]mSpanList // free lists of given length up to _MaxMHeapList
|
|
|
|
|
freelarge mTreap // free treap of length >= _MaxMHeapList
|
|
|
|
|
busy [_MaxMHeapList]mSpanList // busy lists of large spans of given length
|
|
|
|
|
busylarge mSpanList // busy lists of large spans length >= _MaxMHeapList
|
2016-10-04 15:51:31 -04:00
|
|
|
sweepgen uint32 // sweep generation, see comment in mspan
|
|
|
|
|
sweepdone uint32 // all spans are swept
|
2017-03-02 16:28:35 -05:00
|
|
|
sweepers uint32 // number of active sweepone calls
|
2016-10-04 15:51:31 -04:00
|
|
|
|
|
|
|
|
// allspans is a slice of all mspans ever created. Each mspan
|
|
|
|
|
// appears exactly once.
|
|
|
|
|
//
|
|
|
|
|
// The memory for allspans is manually managed and can be
|
|
|
|
|
// reallocated and move as the heap grows.
|
|
|
|
|
//
|
|
|
|
|
// In general, allspans is protected by mheap_.lock, which
|
|
|
|
|
// prevents concurrent access as well as freeing the backing
|
|
|
|
|
// store. Accesses during STW might not hold the lock, but
|
|
|
|
|
// must ensure that allocation cannot happen around the
|
|
|
|
|
// access (since that may free the backing store).
|
|
|
|
|
allspans []*mspan // all spans out there
|
|
|
|
|
|
2016-10-04 16:03:00 -04:00
|
|
|
// spans is a lookup table to map virtual address page IDs to *mspan.
|
|
|
|
|
// For allocated spans, their pages map to the span itself.
|
|
|
|
|
// For free spans, only the lowest and highest pages map to the span itself.
|
|
|
|
|
// Internal pages map to an arbitrary span.
|
|
|
|
|
// For pages that have never been allocated, spans entries are nil.
|
2016-10-04 16:22:41 -04:00
|
|
|
//
|
|
|
|
|
// This is backed by a reserved region of the address space so
|
|
|
|
|
// it can grow without moving. The memory up to len(spans) is
|
|
|
|
|
// mapped. cap(spans) indicates the total reserved memory.
|
|
|
|
|
spans []*mspan
|
|
|
|
|
|
runtime: make sweep time proportional to in-use spans
Currently sweeping walks the list of all spans, which means the work
in sweeping is proportional to the maximum number of spans ever used.
If the heap was once large but is now small, this causes an
amortization failure: on a small heap, GCs happen frequently, but a
full sweep still has to happen in each GC cycle, which means we spent
a lot of time in sweeping.
Fix this by creating a separate list consisting of just the in-use
spans to be swept, so sweeping is proportional to the number of in-use
spans (which is proportional to the live heap). Specifically, we
create two lists: a list of unswept in-use spans and a list of swept
in-use spans. At the start of the sweep cycle, the swept list becomes
the unswept list and the new swept list is empty. Allocating a new
in-use span adds it to the swept list. Sweeping moves spans from the
unswept list to the swept list.
This fixes the amortization problem because a shrinking heap moves
spans off the unswept list without adding them to the swept list,
reducing the time required by the next sweep cycle.
Updates #9265. This fix eliminates almost all of the time spent in
sweepone; however, markrootSpans has essentially the same bug, so now
the test program from this issue spends all of its time in
markrootSpans.
No significant effect on other benchmarks.
Change-Id: Ib382e82790aad907da1c127e62b3ab45d7a4ac1e
Reviewed-on: https://go-review.googlesource.com/30535
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rick Hudson <rlh@golang.org>
2016-10-05 17:50:39 -04:00
|
|
|
// sweepSpans contains two mspan stacks: one of swept in-use
|
|
|
|
|
// spans, and one of unswept in-use spans. These two trade
|
|
|
|
|
// roles on each GC cycle. Since the sweepgen increases by 2
|
|
|
|
|
// on each cycle, this means the swept spans are in
|
|
|
|
|
// sweepSpans[sweepgen/2%2] and the unswept spans are in
|
|
|
|
|
// sweepSpans[1-sweepgen/2%2]. Sweeping pops spans from the
|
|
|
|
|
// unswept stack and pushes spans that are still in-use on the
|
|
|
|
|
// swept stack. Likewise, allocating an in-use span pushes it
|
|
|
|
|
// on the swept stack.
|
|
|
|
|
sweepSpans [2]gcSweepBuf
|
|
|
|
|
|
2017-03-02 16:28:35 -05:00
|
|
|
_ uint32 // align uint64 fields on 32-bit for atomics
|
2015-02-19 13:38:46 -05:00
|
|
|
|
2015-05-11 12:03:30 -04:00
|
|
|
// Proportional sweep
|
2017-04-03 15:47:11 -04:00
|
|
|
//
|
|
|
|
|
// These parameters represent a linear function from heap_live
|
|
|
|
|
// to page sweep count. The proportional sweep system works to
|
|
|
|
|
// stay in the black by keeping the current page sweep count
|
|
|
|
|
// above this line at the current heap_live.
|
|
|
|
|
//
|
|
|
|
|
// The line has slope sweepPagesPerByte and passes through a
|
|
|
|
|
// basis point at (sweepHeapLiveBasis, pagesSweptBasis). At
|
|
|
|
|
// any given time, the system is at (memstats.heap_live,
|
|
|
|
|
// pagesSwept) in this space.
|
|
|
|
|
//
|
|
|
|
|
// It's important that the line pass through a point we
|
|
|
|
|
// control rather than simply starting at a (0,0) origin
|
|
|
|
|
// because that lets us adjust sweep pacing at any time while
|
|
|
|
|
// accounting for current progress. If we could only adjust
|
|
|
|
|
// the slope, it would create a discontinuity in debt if any
|
|
|
|
|
// progress has already been made.
|
2017-04-03 15:22:06 -04:00
|
|
|
pagesInUse uint64 // pages of spans in stats _MSpanInUse; R/W with mheap.lock
|
|
|
|
|
pagesSwept uint64 // pages swept this cycle; updated atomically
|
2017-04-03 15:47:11 -04:00
|
|
|
pagesSweptBasis uint64 // pagesSwept to use as the origin of the sweep ratio; updated atomically
|
2017-04-03 15:22:06 -04:00
|
|
|
sweepHeapLiveBasis uint64 // value of heap_live to use as the origin of sweep ratio; written with lock, read without
|
|
|
|
|
sweepPagesPerByte float64 // proportional sweep ratio; written with lock, read without
|
2015-09-26 12:31:59 -04:00
|
|
|
// TODO(austin): pagesInUse should be a uintptr, but the 386
|
|
|
|
|
// compiler can't 8-byte align fields.
|
2015-05-11 12:03:30 -04:00
|
|
|
|
|
|
|
|
// Malloc stats.
|
2017-01-03 10:15:55 -07:00
|
|
|
largealloc uint64 // bytes allocated for large objects
|
|
|
|
|
nlargealloc uint64 // number of large object allocations
|
|
|
|
|
largefree uint64 // bytes freed for large objects (>maxsmallsize)
|
|
|
|
|
nlargefree uint64 // number of frees for large objects (>maxsmallsize)
|
|
|
|
|
nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
|
2015-05-11 12:03:30 -04:00
|
|
|
|
2015-02-19 13:38:46 -05:00
|
|
|
// range of addresses we might see in the heap
|
runtime: use entire address space on 32 bit
In issue #13992, Russ mentioned that the heap bitmap footprint was
halved but that the bitmap size calculation hadn't been updated. This
presents the opportunity to either halve the bitmap size or double
the addressable virtual space. This CL doubles the addressable virtual
space. On 32 bit this can be tweaked further to allow the bitmap to
cover the entire 4GB virtual address space, removing a failure mode
if the kernel hands out memory with a too low address.
First, fix the calculation and double _MaxArena32 to cover 4GB virtual
memory space with the same bitmap size (256 MB).
Then, allow the fallback mode for the initial memory reservation
on 32 bit (or 64 bit with too little available virtual memory) to not
include space for the arena. mheap.sysAlloc will automatically reserve
additional space when the existing arena is full.
Finally, set arena_start to 0 in 32 bit mode, so that any address is
acceptable for subsequent (additional) reservations.
Before, the bitmap was always located just before arena_start, so
fix the two places relying on that assumption: Point the otherwise unused
mheap.bitmap to one byte after the end of the bitmap, and use it for
bitmap addressing instead of arena_start.
With arena_start set to 0 on 32 bit, the cgoInRange check is no longer a
sufficient check for Go pointers. Introduce and call inHeapOrStack to
check whether a pointer is to the Go heap or stack.
While we're here, remove sysReserveHigh which seems to be unused.
Fixes #13992
Change-Id: I592b513148a50b9d3967b5c5d94b86b3ec39acc2
Reviewed-on: https://go-review.googlesource.com/20471
Reviewed-by: Austin Clements <austin@google.com>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-03-09 10:00:12 +01:00
|
|
|
bitmap uintptr // Points to one byte past the end of the bitmap
|
2015-02-19 13:38:46 -05:00
|
|
|
bitmap_mapped uintptr
|
|
|
|
|
arena_start uintptr
|
2017-04-07 13:49:51 -04:00
|
|
|
arena_used uintptr // One byte past usable heap arena. Set with setArenaUsed.
|
2015-02-19 13:38:46 -05:00
|
|
|
arena_end uintptr
|
|
|
|
|
arena_reserved bool
|
|
|
|
|
|
|
|
|
|
// central free lists for small size classes.
|
|
|
|
|
// the padding makes sure that the MCentrals are
|
|
|
|
|
// spaced CacheLineSize bytes apart, so that each MCentral.lock
|
|
|
|
|
// gets its own cache line.
|
|
|
|
|
central [_NumSizeClasses]struct {
|
|
|
|
|
mcentral mcentral
|
2017-04-24 17:26:41 +03:00
|
|
|
pad [sys.CacheLineSize - unsafe.Sizeof(mcentral{})%sys.CacheLineSize]byte
|
2015-02-19 13:38:46 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
spanalloc fixalloc // allocator for span*
|
|
|
|
|
cachealloc fixalloc // allocator for mcache*
|
2017-03-27 14:20:35 -04:00
|
|
|
treapalloc fixalloc // allocator for treapNodes* used by large objects
|
2015-02-19 13:38:46 -05:00
|
|
|
specialfinalizeralloc fixalloc // allocator for specialfinalizer*
|
|
|
|
|
specialprofilealloc fixalloc // allocator for specialprofile*
|
2015-06-11 16:49:38 +03:00
|
|
|
speciallock mutex // lock for special record allocators.
|
2015-02-19 13:38:46 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var mheap_ mheap
|
|
|
|
|
|
|
|
|
|
// An MSpan is a run of pages.
|
2014-11-11 17:05:02 -05:00
|
|
|
//
|
|
|
|
|
// When a MSpan is in the heap free list, state == MSpanFree
|
|
|
|
|
// and heapmap(s->start) == span, heapmap(s->start+s->npages-1) == span.
|
|
|
|
|
//
|
2017-03-16 14:16:31 -04:00
|
|
|
// When a MSpan is allocated, state == MSpanInUse or MSpanManual
|
2014-11-11 17:05:02 -05:00
|
|
|
// and heapmap(i) == span for all s->start <= i < s->start+s->npages.
|
|
|
|
|
|
2015-02-19 13:38:46 -05:00
|
|
|
// Every MSpan is in one doubly-linked list,
|
|
|
|
|
// either one of the MHeap's free lists or one of the
|
2015-10-15 15:59:49 -07:00
|
|
|
// MCentral's span lists.
|
2015-02-19 13:38:46 -05:00
|
|
|
|
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
|
|
|
// An MSpan representing actual memory has state _MSpanInUse,
|
2017-03-16 14:16:31 -04:00
|
|
|
// _MSpanManual, or _MSpanFree. Transitions between these states are
|
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
|
|
|
// constrained as follows:
|
|
|
|
|
//
|
2017-03-16 14:16:31 -04:00
|
|
|
// * A span may transition from free to in-use or manual during any GC
|
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
|
|
|
// phase.
|
|
|
|
|
//
|
|
|
|
|
// * During sweeping (gcphase == _GCoff), a span may transition from
|
2017-03-16 14:16:31 -04:00
|
|
|
// in-use to free (as a result of sweeping) or manual to free (as a
|
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
|
|
|
// result of stacks being freed).
|
|
|
|
|
//
|
|
|
|
|
// * During GC (gcphase != _GCoff), a span *must not* transition from
|
2017-03-16 14:16:31 -04:00
|
|
|
// manual or in-use to free. Because concurrent GC may read a pointer
|
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
|
|
|
// and then look up its span, the span state must be monotonic.
|
2016-09-09 10:31:27 -04:00
|
|
|
type mSpanState uint8
|
|
|
|
|
|
2015-02-19 13:38:46 -05:00
|
|
|
const (
|
2017-03-16 14:16:31 -04:00
|
|
|
_MSpanDead mSpanState = iota
|
|
|
|
|
_MSpanInUse // allocated for garbage collected heap
|
|
|
|
|
_MSpanManual // allocated for manual management (e.g., stack allocator)
|
2015-02-19 13:38:46 -05:00
|
|
|
_MSpanFree
|
|
|
|
|
)
|
|
|
|
|
|
2016-09-09 10:22:10 -04:00
|
|
|
// mSpanStateNames are the names of the span states, indexed by
|
|
|
|
|
// mSpanState.
|
|
|
|
|
var mSpanStateNames = []string{
|
2016-10-18 17:29:37 -04:00
|
|
|
"_MSpanDead",
|
2016-09-09 10:22:10 -04:00
|
|
|
"_MSpanInUse",
|
2017-03-16 14:16:31 -04:00
|
|
|
"_MSpanManual",
|
2016-09-09 10:22:10 -04:00
|
|
|
"_MSpanFree",
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-15 15:59:49 -07:00
|
|
|
// mSpanList heads a linked list of spans.
|
|
|
|
|
//
|
2016-10-11 22:58:21 -04:00
|
|
|
//go:notinheap
|
2015-10-15 15:59:49 -07:00
|
|
|
type mSpanList struct {
|
2016-10-11 11:47:14 -04:00
|
|
|
first *mspan // first span in list, or nil if none
|
|
|
|
|
last *mspan // last span in list, or nil if none
|
2015-10-15 15:59:49 -07:00
|
|
|
}
|
|
|
|
|
|
2016-10-11 22:58:21 -04:00
|
|
|
//go:notinheap
|
2015-02-19 13:38:46 -05:00
|
|
|
type mspan struct {
|
2015-10-15 15:59:49 -07:00
|
|
|
next *mspan // next span in list, or nil if none
|
2016-10-11 11:47:14 -04:00
|
|
|
prev *mspan // previous span in list, or nil if none
|
2015-10-15 15:59:49 -07:00
|
|
|
list *mSpanList // For debugging. TODO: Remove.
|
2016-04-28 11:21:01 -04:00
|
|
|
|
2017-03-16 15:02:02 -04:00
|
|
|
startAddr uintptr // address of first byte of span aka s.base()
|
|
|
|
|
npages uintptr // number of pages in span
|
|
|
|
|
|
|
|
|
|
manualFreeList gclinkptr // list of free objects in _MSpanManual spans
|
2016-02-04 11:41:48 -05:00
|
|
|
|
|
|
|
|
// freeindex is the slot index between 0 and nelems at which to begin scanning
|
|
|
|
|
// for the next free object in this span.
|
|
|
|
|
// Each allocation scans allocBits starting at freeindex until it encounters a 0
|
|
|
|
|
// indicating a free object. freeindex is then adjusted so that subsequent scans begin
|
2017-03-05 09:14:38 -08:00
|
|
|
// just past the newly discovered free object.
|
2016-02-04 11:41:48 -05:00
|
|
|
//
|
|
|
|
|
// If freeindex == nelem, this span has no free objects.
|
|
|
|
|
//
|
|
|
|
|
// allocBits is a bitmap of objects in this span.
|
|
|
|
|
// If n >= freeindex and allocBits[n/8] & (1<<(n%8)) is 0
|
|
|
|
|
// then object n is free;
|
|
|
|
|
// otherwise, object n is allocated. Bits starting at nelem are
|
|
|
|
|
// undefined and should never be referenced.
|
|
|
|
|
//
|
|
|
|
|
// Object n starts at address n*elemsize + (start << pageShift).
|
2016-02-24 14:36:30 -05:00
|
|
|
freeindex uintptr
|
2016-03-02 12:15:02 -05:00
|
|
|
// TODO: Look up nelems from sizeclass and remove this field if it
|
|
|
|
|
// helps performance.
|
|
|
|
|
nelems uintptr // number of object in the span.
|
2016-02-24 14:36:30 -05:00
|
|
|
|
|
|
|
|
// Cache of the allocBits at freeindex. allocCache is shifted
|
|
|
|
|
// such that the lowest bit corresponds to the bit freeindex.
|
|
|
|
|
// allocCache holds the complement of allocBits, thus allowing
|
2016-03-31 10:45:36 -04:00
|
|
|
// ctz (count trailing zero) to use it directly.
|
2016-02-24 14:36:30 -05:00
|
|
|
// allocCache may contain bits beyond s.nelems; the caller must ignore
|
|
|
|
|
// these.
|
|
|
|
|
allocCache uint64
|
2016-02-04 11:41:48 -05:00
|
|
|
|
2016-03-14 12:17:48 -04:00
|
|
|
// allocBits and gcmarkBits hold pointers to a span's mark and
|
|
|
|
|
// allocation bits. The pointers are 8 byte aligned.
|
|
|
|
|
// There are three arenas where this data is held.
|
|
|
|
|
// free: Dirty arenas that are no longer accessed
|
|
|
|
|
// and can be reused.
|
|
|
|
|
// next: Holds information to be used in the next GC cycle.
|
|
|
|
|
// current: Information being used during this GC cycle.
|
|
|
|
|
// previous: Information being used during the last GC cycle.
|
|
|
|
|
// A new GC cycle starts with the call to finishsweep_m.
|
|
|
|
|
// finishsweep_m moves the previous arena to the free arena,
|
|
|
|
|
// the current arena to the previous arena, and
|
|
|
|
|
// the next arena to the current arena.
|
|
|
|
|
// The next arena is populated as the spans request
|
|
|
|
|
// memory to hold gcmarkBits for the next GC cycle as well
|
|
|
|
|
// as allocBits for newly allocated spans.
|
|
|
|
|
//
|
|
|
|
|
// The pointer arithmetic is done "by hand" instead of using
|
|
|
|
|
// arrays to avoid bounds checks along critical performance
|
|
|
|
|
// paths.
|
|
|
|
|
// The sweep will free the old allocBits and set allocBits to the
|
|
|
|
|
// gcmarkBits. The gcmarkBits are replaced with a fresh zeroed
|
|
|
|
|
// out memory.
|
2017-03-24 12:02:12 -04:00
|
|
|
allocBits *gcBits
|
|
|
|
|
gcmarkBits *gcBits
|
2016-02-04 11:41:48 -05:00
|
|
|
|
2015-02-19 13:38:46 -05:00
|
|
|
// sweep generation:
|
|
|
|
|
// if sweepgen == h->sweepgen - 2, the span needs sweeping
|
|
|
|
|
// if sweepgen == h->sweepgen - 1, the span is currently being swept
|
|
|
|
|
// if sweepgen == h->sweepgen, the span is swept and ready to use
|
|
|
|
|
// h->sweepgen is incremented by 2 after every GC
|
2015-04-15 17:08:58 -04:00
|
|
|
|
2015-02-19 13:38:46 -05:00
|
|
|
sweepgen uint32
|
2016-10-26 21:25:56 -07:00
|
|
|
divMul uint16 // for divide by elemsize - divMagic.mul
|
|
|
|
|
baseMask uint16 // if non-0, elemsize is a power of 2, & this will get object allocation base
|
2017-01-03 10:15:55 -07:00
|
|
|
allocCount uint16 // number of allocated objects
|
2016-09-09 10:31:27 -04:00
|
|
|
sizeclass uint8 // size class
|
|
|
|
|
incache bool // being used by an mcache
|
|
|
|
|
state mSpanState // mspaninuse etc
|
|
|
|
|
needzero uint8 // needs to be zeroed before allocation
|
|
|
|
|
divShift uint8 // for divide by elemsize - divMagic.shift
|
|
|
|
|
divShift2 uint8 // for divide by elemsize - divMagic.shift2
|
|
|
|
|
elemsize uintptr // computed from sizeclass or from npages
|
|
|
|
|
unusedsince int64 // first time spotted by gc in mspanfree state
|
|
|
|
|
npreleased uintptr // number of pages released to the os
|
|
|
|
|
limit uintptr // end of data in span
|
|
|
|
|
speciallock mutex // guards specials list
|
|
|
|
|
specials *special // linked list of special records sorted by offset.
|
2015-02-19 13:38:46 -05:00
|
|
|
}
|
2014-11-11 17:05:02 -05:00
|
|
|
|
2015-02-19 13:38:46 -05:00
|
|
|
func (s *mspan) base() uintptr {
|
2016-03-14 12:02:02 -04:00
|
|
|
return s.startAddr
|
2015-02-19 13:38:46 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (s *mspan) layout() (size, n, total uintptr) {
|
|
|
|
|
total = s.npages << _PageShift
|
|
|
|
|
size = s.elemsize
|
|
|
|
|
if size > 0 {
|
|
|
|
|
n = total / size
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
2014-11-11 17:05:02 -05:00
|
|
|
|
|
|
|
|
func recordspan(vh unsafe.Pointer, p unsafe.Pointer) {
|
|
|
|
|
h := (*mheap)(vh)
|
|
|
|
|
s := (*mspan)(p)
|
2016-10-04 15:51:31 -04:00
|
|
|
if len(h.allspans) >= cap(h.allspans) {
|
2015-11-11 12:39:30 -05:00
|
|
|
n := 64 * 1024 / sys.PtrSize
|
2016-10-04 15:51:31 -04:00
|
|
|
if n < cap(h.allspans)*3/2 {
|
|
|
|
|
n = cap(h.allspans) * 3 / 2
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
var new []*mspan
|
|
|
|
|
sp := (*slice)(unsafe.Pointer(&new))
|
2015-11-11 12:39:30 -05:00
|
|
|
sp.array = sysAlloc(uintptr(n)*sys.PtrSize, &memstats.other_sys)
|
2014-11-11 17:05:02 -05:00
|
|
|
if sp.array == nil {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("runtime: cannot allocate memory")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
2016-10-04 15:51:31 -04:00
|
|
|
sp.len = len(h.allspans)
|
2015-04-11 10:01:54 +12:00
|
|
|
sp.cap = n
|
2016-10-04 15:51:31 -04:00
|
|
|
if len(h.allspans) > 0 {
|
|
|
|
|
copy(new, h.allspans)
|
|
|
|
|
}
|
|
|
|
|
oldAllspans := h.allspans
|
|
|
|
|
h.allspans = new
|
2016-10-05 21:22:33 -04:00
|
|
|
if len(oldAllspans) != 0 {
|
2016-10-04 15:51:31 -04:00
|
|
|
sysFree(unsafe.Pointer(&oldAllspans[0]), uintptr(cap(oldAllspans))*unsafe.Sizeof(oldAllspans[0]), &memstats.other_sys)
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
}
|
2016-10-04 15:51:31 -04:00
|
|
|
h.allspans = append(h.allspans, s)
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
2015-02-19 13:38:46 -05:00
|
|
|
// inheap reports whether b is a pointer into a (potentially dead) heap object.
|
2017-03-16 14:16:31 -04:00
|
|
|
// It returns false for pointers into _MSpanManual spans.
|
runtime: fix callwritebarrier
Given a call frame F of size N where the return values start at offset R,
callwritebarrier was instructing heapBitsBulkBarrier to scan the block
of memory [F+R, F+R+N). It should only scan [F+R, F+N). The extra N-R
bytes scanned might lead into the next allocated block in memory.
Because the scan was consulting the heap bitmap for type information,
scanning into the next block normally "just worked" in the sense of
not crashing.
Scanning the extra N-R bytes of memory is a problem mainly because
it causes the GC to consider pointers that might otherwise not be
considered, leading it to retain objects that should actually be freed.
This is very difficult to detect.
Luckily, juju turned up a case where the heap bitmap and the memory
were out of sync for the block immediately after the call frame, so that
heapBitsBulkBarrier saw an obvious non-pointer where it expected a
pointer, causing a loud crash.
Why is there a non-pointer in memory that the heap bitmap records as
a pointer? That is more difficult to answer. At least one way that it
could happen is that allocations containing no pointers at all do not
update the heap bitmap. So if heapBitsBulkBarrier walked out of the
current object and into a no-pointer object and consulted those bitmap
bits, it would be misled. This doesn't happen in general because all
the paths to heapBitsBulkBarrier first check for the no-pointer case.
This may or may not be what happened, but it's the only scenario
I've been able to construct.
I tried for quite a while to write a simple test for this and could not.
It does fix the juju crash, and it is clearly an improvement over the
old code.
Fixes #10844.
Change-Id: I53982c93ef23ef93155c4086bbd95a4c4fdaac9a
Reviewed-on: https://go-review.googlesource.com/10317
Reviewed-by: Austin Clements <austin@google.com>
2015-05-19 22:58:10 -04:00
|
|
|
// Non-preemptible because it is used by write barriers.
|
2015-02-19 13:38:46 -05:00
|
|
|
//go:nowritebarrier
|
runtime: fix callwritebarrier
Given a call frame F of size N where the return values start at offset R,
callwritebarrier was instructing heapBitsBulkBarrier to scan the block
of memory [F+R, F+R+N). It should only scan [F+R, F+N). The extra N-R
bytes scanned might lead into the next allocated block in memory.
Because the scan was consulting the heap bitmap for type information,
scanning into the next block normally "just worked" in the sense of
not crashing.
Scanning the extra N-R bytes of memory is a problem mainly because
it causes the GC to consider pointers that might otherwise not be
considered, leading it to retain objects that should actually be freed.
This is very difficult to detect.
Luckily, juju turned up a case where the heap bitmap and the memory
were out of sync for the block immediately after the call frame, so that
heapBitsBulkBarrier saw an obvious non-pointer where it expected a
pointer, causing a loud crash.
Why is there a non-pointer in memory that the heap bitmap records as
a pointer? That is more difficult to answer. At least one way that it
could happen is that allocations containing no pointers at all do not
update the heap bitmap. So if heapBitsBulkBarrier walked out of the
current object and into a no-pointer object and consulted those bitmap
bits, it would be misled. This doesn't happen in general because all
the paths to heapBitsBulkBarrier first check for the no-pointer case.
This may or may not be what happened, but it's the only scenario
I've been able to construct.
I tried for quite a while to write a simple test for this and could not.
It does fix the juju crash, and it is clearly an improvement over the
old code.
Fixes #10844.
Change-Id: I53982c93ef23ef93155c4086bbd95a4c4fdaac9a
Reviewed-on: https://go-review.googlesource.com/10317
Reviewed-by: Austin Clements <austin@google.com>
2015-05-19 22:58:10 -04:00
|
|
|
//go:nosplit
|
2015-02-19 13:38:46 -05:00
|
|
|
func inheap(b uintptr) bool {
|
|
|
|
|
if b == 0 || b < mheap_.arena_start || b >= mheap_.arena_used {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
// Not a beginning of a block, consult span table to find the block beginning.
|
2016-10-04 16:03:00 -04:00
|
|
|
s := mheap_.spans[(b-mheap_.arena_start)>>_PageShift]
|
2016-04-28 11:21:01 -04:00
|
|
|
if s == nil || b < s.base() || b >= s.limit || s.state != mSpanInUse {
|
2015-02-19 13:38:46 -05:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-16 14:16:31 -04:00
|
|
|
// inHeapOrStack is a variant of inheap that returns true for pointers
|
|
|
|
|
// into any allocated heap span.
|
|
|
|
|
//
|
runtime: use entire address space on 32 bit
In issue #13992, Russ mentioned that the heap bitmap footprint was
halved but that the bitmap size calculation hadn't been updated. This
presents the opportunity to either halve the bitmap size or double
the addressable virtual space. This CL doubles the addressable virtual
space. On 32 bit this can be tweaked further to allow the bitmap to
cover the entire 4GB virtual address space, removing a failure mode
if the kernel hands out memory with a too low address.
First, fix the calculation and double _MaxArena32 to cover 4GB virtual
memory space with the same bitmap size (256 MB).
Then, allow the fallback mode for the initial memory reservation
on 32 bit (or 64 bit with too little available virtual memory) to not
include space for the arena. mheap.sysAlloc will automatically reserve
additional space when the existing arena is full.
Finally, set arena_start to 0 in 32 bit mode, so that any address is
acceptable for subsequent (additional) reservations.
Before, the bitmap was always located just before arena_start, so
fix the two places relying on that assumption: Point the otherwise unused
mheap.bitmap to one byte after the end of the bitmap, and use it for
bitmap addressing instead of arena_start.
With arena_start set to 0 on 32 bit, the cgoInRange check is no longer a
sufficient check for Go pointers. Introduce and call inHeapOrStack to
check whether a pointer is to the Go heap or stack.
While we're here, remove sysReserveHigh which seems to be unused.
Fixes #13992
Change-Id: I592b513148a50b9d3967b5c5d94b86b3ec39acc2
Reviewed-on: https://go-review.googlesource.com/20471
Reviewed-by: Austin Clements <austin@google.com>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-03-09 10:00:12 +01:00
|
|
|
//go:nowritebarrier
|
|
|
|
|
//go:nosplit
|
|
|
|
|
func inHeapOrStack(b uintptr) bool {
|
|
|
|
|
if b == 0 || b < mheap_.arena_start || b >= mheap_.arena_used {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
// Not a beginning of a block, consult span table to find the block beginning.
|
2016-10-04 16:03:00 -04:00
|
|
|
s := mheap_.spans[(b-mheap_.arena_start)>>_PageShift]
|
runtime: use entire address space on 32 bit
In issue #13992, Russ mentioned that the heap bitmap footprint was
halved but that the bitmap size calculation hadn't been updated. This
presents the opportunity to either halve the bitmap size or double
the addressable virtual space. This CL doubles the addressable virtual
space. On 32 bit this can be tweaked further to allow the bitmap to
cover the entire 4GB virtual address space, removing a failure mode
if the kernel hands out memory with a too low address.
First, fix the calculation and double _MaxArena32 to cover 4GB virtual
memory space with the same bitmap size (256 MB).
Then, allow the fallback mode for the initial memory reservation
on 32 bit (or 64 bit with too little available virtual memory) to not
include space for the arena. mheap.sysAlloc will automatically reserve
additional space when the existing arena is full.
Finally, set arena_start to 0 in 32 bit mode, so that any address is
acceptable for subsequent (additional) reservations.
Before, the bitmap was always located just before arena_start, so
fix the two places relying on that assumption: Point the otherwise unused
mheap.bitmap to one byte after the end of the bitmap, and use it for
bitmap addressing instead of arena_start.
With arena_start set to 0 on 32 bit, the cgoInRange check is no longer a
sufficient check for Go pointers. Introduce and call inHeapOrStack to
check whether a pointer is to the Go heap or stack.
While we're here, remove sysReserveHigh which seems to be unused.
Fixes #13992
Change-Id: I592b513148a50b9d3967b5c5d94b86b3ec39acc2
Reviewed-on: https://go-review.googlesource.com/20471
Reviewed-by: Austin Clements <austin@google.com>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-03-09 10:00:12 +01:00
|
|
|
if s == nil || b < s.base() {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
switch s.state {
|
2017-03-16 14:16:31 -04:00
|
|
|
case mSpanInUse, _MSpanManual:
|
runtime: use entire address space on 32 bit
In issue #13992, Russ mentioned that the heap bitmap footprint was
halved but that the bitmap size calculation hadn't been updated. This
presents the opportunity to either halve the bitmap size or double
the addressable virtual space. This CL doubles the addressable virtual
space. On 32 bit this can be tweaked further to allow the bitmap to
cover the entire 4GB virtual address space, removing a failure mode
if the kernel hands out memory with a too low address.
First, fix the calculation and double _MaxArena32 to cover 4GB virtual
memory space with the same bitmap size (256 MB).
Then, allow the fallback mode for the initial memory reservation
on 32 bit (or 64 bit with too little available virtual memory) to not
include space for the arena. mheap.sysAlloc will automatically reserve
additional space when the existing arena is full.
Finally, set arena_start to 0 in 32 bit mode, so that any address is
acceptable for subsequent (additional) reservations.
Before, the bitmap was always located just before arena_start, so
fix the two places relying on that assumption: Point the otherwise unused
mheap.bitmap to one byte after the end of the bitmap, and use it for
bitmap addressing instead of arena_start.
With arena_start set to 0 on 32 bit, the cgoInRange check is no longer a
sufficient check for Go pointers. Introduce and call inHeapOrStack to
check whether a pointer is to the Go heap or stack.
While we're here, remove sysReserveHigh which seems to be unused.
Fixes #13992
Change-Id: I592b513148a50b9d3967b5c5d94b86b3ec39acc2
Reviewed-on: https://go-review.googlesource.com/20471
Reviewed-by: Austin Clements <austin@google.com>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-03-09 10:00:12 +01:00
|
|
|
return b < s.limit
|
|
|
|
|
default:
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
runtime: eliminate one heapBitsForObject from scanobject
scanobject with ptrmask!=nil is only ever called with the base
pointer of a heap object. Currently, scanobject calls
heapBitsForObject, which goes to a great deal of trouble to check
that the pointer points into the heap and to find the base of the
object it points to, both of which are completely unnecessary in
this case.
Replace this call to heapBitsForObject with much simpler logic to
fetch the span and compute the heap bits.
Benchmark results with five runs:
name old mean new mean delta
BenchmarkBinaryTree17 9.21s × (0.95,1.02) 8.55s × (0.91,1.03) -7.16% (p=0.022)
BenchmarkFannkuch11 2.65s × (1.00,1.00) 2.62s × (1.00,1.00) -1.10% (p=0.000)
BenchmarkFmtFprintfEmpty 73.2ns × (0.99,1.01) 71.7ns × (1.00,1.01) -1.99% (p=0.004)
BenchmarkFmtFprintfString 302ns × (0.99,1.00) 292ns × (0.98,1.02) -3.31% (p=0.020)
BenchmarkFmtFprintfInt 281ns × (0.98,1.01) 279ns × (0.96,1.02) ~ (p=0.596)
BenchmarkFmtFprintfIntInt 482ns × (0.98,1.01) 488ns × (0.95,1.02) ~ (p=0.419)
BenchmarkFmtFprintfPrefixedInt 382ns × (0.99,1.01) 365ns × (0.96,1.02) -4.35% (p=0.015)
BenchmarkFmtFprintfFloat 475ns × (0.99,1.01) 472ns × (1.00,1.00) ~ (p=0.108)
BenchmarkFmtManyArgs 1.89µs × (1.00,1.01) 1.90µs × (0.94,1.02) ~ (p=0.883)
BenchmarkGobDecode 22.4ms × (0.99,1.01) 21.9ms × (0.92,1.04) ~ (p=0.332)
BenchmarkGobEncode 24.7ms × (0.98,1.02) 23.9ms × (0.87,1.07) ~ (p=0.407)
BenchmarkGzip 397ms × (0.99,1.01) 398ms × (0.99,1.01) ~ (p=0.718)
BenchmarkGunzip 96.7ms × (1.00,1.00) 96.9ms × (1.00,1.00) ~ (p=0.230)
BenchmarkHTTPClientServer 71.5µs × (0.98,1.01) 68.5µs × (0.92,1.06) ~ (p=0.243)
BenchmarkJSONEncode 46.1ms × (0.98,1.01) 44.9ms × (0.98,1.03) -2.51% (p=0.040)
BenchmarkJSONDecode 86.1ms × (0.99,1.01) 86.5ms × (0.99,1.01) ~ (p=0.343)
BenchmarkMandelbrot200 4.12ms × (1.00,1.00) 4.13ms × (1.00,1.00) +0.23% (p=0.000)
BenchmarkGoParse 5.89ms × (0.96,1.03) 5.82ms × (0.96,1.04) ~ (p=0.522)
BenchmarkRegexpMatchEasy0_32 141ns × (0.99,1.01) 142ns × (1.00,1.00) ~ (p=0.178)
BenchmarkRegexpMatchEasy0_1K 408ns × (1.00,1.00) 392ns × (0.99,1.00) -3.83% (p=0.000)
BenchmarkRegexpMatchEasy1_32 122ns × (1.00,1.00) 122ns × (1.00,1.00) ~ (p=0.178)
BenchmarkRegexpMatchEasy1_1K 626ns × (1.00,1.01) 624ns × (0.99,1.00) ~ (p=0.122)
BenchmarkRegexpMatchMedium_32 202ns × (0.99,1.00) 205ns × (0.99,1.01) +1.58% (p=0.001)
BenchmarkRegexpMatchMedium_1K 54.4µs × (1.00,1.00) 55.5µs × (1.00,1.00) +1.86% (p=0.000)
BenchmarkRegexpMatchHard_32 2.68µs × (1.00,1.00) 2.71µs × (1.00,1.00) +0.97% (p=0.002)
BenchmarkRegexpMatchHard_1K 79.8µs × (1.00,1.01) 80.5µs × (1.00,1.01) +0.94% (p=0.003)
BenchmarkRevcomp 590ms × (0.99,1.01) 585ms × (1.00,1.00) ~ (p=0.066)
BenchmarkTemplate 111ms × (0.97,1.02) 112ms × (0.99,1.01) ~ (p=0.201)
BenchmarkTimeParse 392ns × (1.00,1.00) 385ns × (1.00,1.00) -1.69% (p=0.000)
BenchmarkTimeFormat 449ns × (0.98,1.01) 448ns × (0.99,1.01) ~ (p=0.550)
Change-Id: Ie7c3830c481d96c9043e7bf26853c6c1d05dc9f4
Reviewed-on: https://go-review.googlesource.com/9364
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-26 18:27:17 -04:00
|
|
|
// TODO: spanOf and spanOfUnchecked are open-coded in a lot of places.
|
|
|
|
|
// Use the functions instead.
|
|
|
|
|
|
|
|
|
|
// spanOf returns the span of p. If p does not point into the heap or
|
|
|
|
|
// no span contains p, spanOf returns nil.
|
|
|
|
|
func spanOf(p uintptr) *mspan {
|
|
|
|
|
if p == 0 || p < mheap_.arena_start || p >= mheap_.arena_used {
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
return spanOfUnchecked(p)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// spanOfUnchecked is equivalent to spanOf, but the caller must ensure
|
|
|
|
|
// that p points into the heap (that is, mheap_.arena_start <= p <
|
|
|
|
|
// mheap_.arena_used).
|
|
|
|
|
func spanOfUnchecked(p uintptr) *mspan {
|
2016-10-04 16:03:00 -04:00
|
|
|
return mheap_.spans[(p-mheap_.arena_start)>>_PageShift]
|
runtime: eliminate one heapBitsForObject from scanobject
scanobject with ptrmask!=nil is only ever called with the base
pointer of a heap object. Currently, scanobject calls
heapBitsForObject, which goes to a great deal of trouble to check
that the pointer points into the heap and to find the base of the
object it points to, both of which are completely unnecessary in
this case.
Replace this call to heapBitsForObject with much simpler logic to
fetch the span and compute the heap bits.
Benchmark results with five runs:
name old mean new mean delta
BenchmarkBinaryTree17 9.21s × (0.95,1.02) 8.55s × (0.91,1.03) -7.16% (p=0.022)
BenchmarkFannkuch11 2.65s × (1.00,1.00) 2.62s × (1.00,1.00) -1.10% (p=0.000)
BenchmarkFmtFprintfEmpty 73.2ns × (0.99,1.01) 71.7ns × (1.00,1.01) -1.99% (p=0.004)
BenchmarkFmtFprintfString 302ns × (0.99,1.00) 292ns × (0.98,1.02) -3.31% (p=0.020)
BenchmarkFmtFprintfInt 281ns × (0.98,1.01) 279ns × (0.96,1.02) ~ (p=0.596)
BenchmarkFmtFprintfIntInt 482ns × (0.98,1.01) 488ns × (0.95,1.02) ~ (p=0.419)
BenchmarkFmtFprintfPrefixedInt 382ns × (0.99,1.01) 365ns × (0.96,1.02) -4.35% (p=0.015)
BenchmarkFmtFprintfFloat 475ns × (0.99,1.01) 472ns × (1.00,1.00) ~ (p=0.108)
BenchmarkFmtManyArgs 1.89µs × (1.00,1.01) 1.90µs × (0.94,1.02) ~ (p=0.883)
BenchmarkGobDecode 22.4ms × (0.99,1.01) 21.9ms × (0.92,1.04) ~ (p=0.332)
BenchmarkGobEncode 24.7ms × (0.98,1.02) 23.9ms × (0.87,1.07) ~ (p=0.407)
BenchmarkGzip 397ms × (0.99,1.01) 398ms × (0.99,1.01) ~ (p=0.718)
BenchmarkGunzip 96.7ms × (1.00,1.00) 96.9ms × (1.00,1.00) ~ (p=0.230)
BenchmarkHTTPClientServer 71.5µs × (0.98,1.01) 68.5µs × (0.92,1.06) ~ (p=0.243)
BenchmarkJSONEncode 46.1ms × (0.98,1.01) 44.9ms × (0.98,1.03) -2.51% (p=0.040)
BenchmarkJSONDecode 86.1ms × (0.99,1.01) 86.5ms × (0.99,1.01) ~ (p=0.343)
BenchmarkMandelbrot200 4.12ms × (1.00,1.00) 4.13ms × (1.00,1.00) +0.23% (p=0.000)
BenchmarkGoParse 5.89ms × (0.96,1.03) 5.82ms × (0.96,1.04) ~ (p=0.522)
BenchmarkRegexpMatchEasy0_32 141ns × (0.99,1.01) 142ns × (1.00,1.00) ~ (p=0.178)
BenchmarkRegexpMatchEasy0_1K 408ns × (1.00,1.00) 392ns × (0.99,1.00) -3.83% (p=0.000)
BenchmarkRegexpMatchEasy1_32 122ns × (1.00,1.00) 122ns × (1.00,1.00) ~ (p=0.178)
BenchmarkRegexpMatchEasy1_1K 626ns × (1.00,1.01) 624ns × (0.99,1.00) ~ (p=0.122)
BenchmarkRegexpMatchMedium_32 202ns × (0.99,1.00) 205ns × (0.99,1.01) +1.58% (p=0.001)
BenchmarkRegexpMatchMedium_1K 54.4µs × (1.00,1.00) 55.5µs × (1.00,1.00) +1.86% (p=0.000)
BenchmarkRegexpMatchHard_32 2.68µs × (1.00,1.00) 2.71µs × (1.00,1.00) +0.97% (p=0.002)
BenchmarkRegexpMatchHard_1K 79.8µs × (1.00,1.01) 80.5µs × (1.00,1.01) +0.94% (p=0.003)
BenchmarkRevcomp 590ms × (0.99,1.01) 585ms × (1.00,1.00) ~ (p=0.066)
BenchmarkTemplate 111ms × (0.97,1.02) 112ms × (0.99,1.01) ~ (p=0.201)
BenchmarkTimeParse 392ns × (1.00,1.00) 385ns × (1.00,1.00) -1.69% (p=0.000)
BenchmarkTimeFormat 449ns × (0.98,1.01) 448ns × (0.99,1.01) ~ (p=0.550)
Change-Id: Ie7c3830c481d96c9043e7bf26853c6c1d05dc9f4
Reviewed-on: https://go-review.googlesource.com/9364
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-26 18:27:17 -04:00
|
|
|
}
|
|
|
|
|
|
2015-02-19 13:38:46 -05:00
|
|
|
func mlookup(v uintptr, base *uintptr, size *uintptr, sp **mspan) int32 {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
|
|
|
|
|
_g_.m.mcache.local_nlookup++
|
2015-11-11 12:39:30 -05:00
|
|
|
if sys.PtrSize == 4 && _g_.m.mcache.local_nlookup >= 1<<30 {
|
2015-02-19 13:38:46 -05:00
|
|
|
// purge cache stats to prevent overflow
|
|
|
|
|
lock(&mheap_.lock)
|
|
|
|
|
purgecachedstats(_g_.m.mcache)
|
|
|
|
|
unlock(&mheap_.lock)
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
s := mheap_.lookupMaybe(unsafe.Pointer(v))
|
2015-02-19 13:38:46 -05:00
|
|
|
if sp != nil {
|
|
|
|
|
*sp = s
|
|
|
|
|
}
|
|
|
|
|
if s == nil {
|
|
|
|
|
if base != nil {
|
|
|
|
|
*base = 0
|
|
|
|
|
}
|
|
|
|
|
if size != nil {
|
|
|
|
|
*size = 0
|
|
|
|
|
}
|
|
|
|
|
return 0
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-14 12:02:02 -04:00
|
|
|
p := s.base()
|
2015-02-19 13:38:46 -05:00
|
|
|
if s.sizeclass == 0 {
|
|
|
|
|
// Large object.
|
|
|
|
|
if base != nil {
|
|
|
|
|
*base = p
|
|
|
|
|
}
|
|
|
|
|
if size != nil {
|
|
|
|
|
*size = s.npages << _PageShift
|
|
|
|
|
}
|
|
|
|
|
return 1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
n := s.elemsize
|
|
|
|
|
if base != nil {
|
2016-02-29 15:01:00 -08:00
|
|
|
i := (v - p) / n
|
2015-02-19 13:38:46 -05:00
|
|
|
*base = p + i*n
|
|
|
|
|
}
|
|
|
|
|
if size != nil {
|
|
|
|
|
*size = n
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 1
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-11 17:05:02 -05:00
|
|
|
// Initialize the heap.
|
2016-10-04 16:03:00 -04:00
|
|
|
func (h *mheap) init(spansStart, spansBytes uintptr) {
|
2017-03-27 14:20:35 -04:00
|
|
|
h.treapalloc.init(unsafe.Sizeof(treapNode{}), nil, nil, &memstats.other_sys)
|
2015-11-11 16:13:51 -08:00
|
|
|
h.spanalloc.init(unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys)
|
|
|
|
|
h.cachealloc.init(unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys)
|
|
|
|
|
h.specialfinalizeralloc.init(unsafe.Sizeof(specialfinalizer{}), nil, nil, &memstats.other_sys)
|
|
|
|
|
h.specialprofilealloc.init(unsafe.Sizeof(specialprofile{}), nil, nil, &memstats.other_sys)
|
2014-11-11 17:05:02 -05:00
|
|
|
|
runtime: make fixalloc zero allocations on reuse
Currently fixalloc does not zero memory it reuses. This is dangerous
with the hybrid barrier if the type may contain heap pointers, since
it may cause us to observe a dead heap pointer on reuse. It's also
error-prone since it's the only allocator that doesn't zero on
allocation (mallocgc of course zeroes, but so do persistentalloc and
sysAlloc). It's also largely pointless: for mcache, the caller
immediately memclrs the allocation; and the two specials types are
tiny so there's no real cost to zeroing them.
Change fixalloc to zero allocations by default.
The only type we don't zero by default is mspan. This actually
requires that the spsn's sweepgen survive across freeing and
reallocating a span. If we were to zero it, the following race would
be possible:
1. The current sweepgen is 2. Span s is on the unswept list.
2. Direct sweeping sweeps span s, finds it's all free, and releases s
to the fixalloc.
3. Thread 1 allocates s from fixalloc. Suppose this zeros s, including
s.sweepgen.
4. Thread 1 calls s.init, which sets s.state to _MSpanDead.
5. On thread 2, background sweeping comes across span s in allspans
and cas's s.sweepgen from 0 (sg-2) to 1 (sg-1). Now it thinks it
owns it for sweeping. 6. Thread 1 continues initializing s.
Everything breaks.
I would like to fix this because it's obviously confusing, but it's a
subtle enough problem that I'm leaving it alone for now. The solution
may be to skip sweepgen 0, but then we have to think about wrap-around
much more carefully.
Updates #17503.
Change-Id: Ie08691feed3abbb06a31381b94beb0a2e36a0613
Reviewed-on: https://go-review.googlesource.com/31368
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Rick Hudson <rlh@golang.org>
2016-09-25 17:12:43 -04:00
|
|
|
// Don't zero mspan allocations. Background sweeping can
|
|
|
|
|
// inspect a span concurrently with allocating it, so it's
|
|
|
|
|
// important that the span's sweepgen survive across freeing
|
|
|
|
|
// and re-allocating a span to prevent background sweeping
|
|
|
|
|
// from improperly cas'ing it from 0.
|
|
|
|
|
//
|
|
|
|
|
// This is safe because mspan contains no heap pointers.
|
|
|
|
|
h.spanalloc.zero = false
|
|
|
|
|
|
2014-11-11 17:05:02 -05:00
|
|
|
// h->mapcache needs no init
|
|
|
|
|
for i := range h.free {
|
2015-11-11 16:13:51 -08:00
|
|
|
h.free[i].init()
|
|
|
|
|
h.busy[i].init()
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
h.busylarge.init()
|
2014-11-11 17:05:02 -05:00
|
|
|
for i := range h.central {
|
2015-11-11 16:13:51 -08:00
|
|
|
h.central[i].mcentral.init(int32(i))
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
2016-10-04 16:03:00 -04:00
|
|
|
sp := (*slice)(unsafe.Pointer(&h.spans))
|
|
|
|
|
sp.array = unsafe.Pointer(spansStart)
|
2016-10-04 16:22:41 -04:00
|
|
|
sp.len = 0
|
2016-10-04 16:03:00 -04:00
|
|
|
sp.cap = int(spansBytes / sys.PtrSize)
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
2017-04-07 13:49:51 -04:00
|
|
|
// setArenaUsed extends the usable arena to address arena_used and
|
|
|
|
|
// maps auxiliary VM regions for any newly usable arena space.
|
|
|
|
|
//
|
|
|
|
|
// racemap indicates that this memory should be managed by the race
|
|
|
|
|
// detector. racemap should be true unless this is covering a VM hole.
|
|
|
|
|
func (h *mheap) setArenaUsed(arena_used uintptr, racemap bool) {
|
|
|
|
|
// Map auxiliary structures *before* h.arena_used is updated.
|
|
|
|
|
// Waiting to update arena_used until after the memory has been mapped
|
|
|
|
|
// avoids faults when other threads try access these regions immediately
|
|
|
|
|
// after observing the change to arena_used.
|
|
|
|
|
|
|
|
|
|
// Map the bitmap.
|
|
|
|
|
h.mapBits(arena_used)
|
|
|
|
|
|
|
|
|
|
// Map spans array.
|
|
|
|
|
h.mapSpans(arena_used)
|
|
|
|
|
|
|
|
|
|
// Tell the race detector about the new heap memory.
|
|
|
|
|
if racemap && raceenabled {
|
|
|
|
|
racemapshadow(unsafe.Pointer(h.arena_used), arena_used-h.arena_used)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
h.arena_used = arena_used
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// mapSpans makes sure that the spans are mapped
|
2015-06-07 22:59:29 -04:00
|
|
|
// up to the new value of arena_used.
|
|
|
|
|
//
|
2017-04-07 13:49:51 -04:00
|
|
|
// Don't call this directly. Call mheap.setArenaUsed.
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) mapSpans(arena_used uintptr) {
|
2014-11-11 17:05:02 -05:00
|
|
|
// Map spans array, PageSize at a time.
|
2015-06-07 22:59:29 -04:00
|
|
|
n := arena_used
|
|
|
|
|
n -= h.arena_start
|
2015-11-11 12:39:30 -05:00
|
|
|
n = n / _PageSize * sys.PtrSize
|
2016-07-18 12:24:02 -04:00
|
|
|
n = round(n, physPageSize)
|
2016-10-04 16:22:41 -04:00
|
|
|
need := n / unsafe.Sizeof(h.spans[0])
|
|
|
|
|
have := uintptr(len(h.spans))
|
|
|
|
|
if have >= need {
|
2014-11-11 17:05:02 -05:00
|
|
|
return
|
|
|
|
|
}
|
2016-10-04 16:22:41 -04:00
|
|
|
h.spans = h.spans[:need]
|
|
|
|
|
sysMap(unsafe.Pointer(&h.spans[have]), (need-have)*unsafe.Sizeof(h.spans[0]), h.arena_reserved, &memstats.other_sys)
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Sweeps spans in list until reclaims at least npages into heap.
|
|
|
|
|
// Returns the actual number of pages reclaimed.
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) reclaimList(list *mSpanList, npages uintptr) uintptr {
|
2014-11-11 17:05:02 -05:00
|
|
|
n := uintptr(0)
|
|
|
|
|
sg := mheap_.sweepgen
|
|
|
|
|
retry:
|
2015-10-15 15:59:49 -07:00
|
|
|
for s := list.first; s != nil; s = s.next {
|
2015-11-02 14:09:24 -05:00
|
|
|
if s.sweepgen == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) {
|
2015-11-11 16:13:51 -08:00
|
|
|
list.remove(s)
|
2014-11-11 17:05:02 -05:00
|
|
|
// swept spans are at the end of the list
|
2017-03-27 14:20:35 -04:00
|
|
|
list.insertBack(s) // Puts it back on a busy list. s is not in the treap at this point.
|
2014-11-11 17:05:02 -05:00
|
|
|
unlock(&h.lock)
|
2015-04-13 22:43:05 -04:00
|
|
|
snpages := s.npages
|
2015-11-11 16:13:51 -08:00
|
|
|
if s.sweep(false) {
|
2015-04-13 22:43:05 -04:00
|
|
|
n += snpages
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
lock(&h.lock)
|
|
|
|
|
if n >= npages {
|
|
|
|
|
return n
|
|
|
|
|
}
|
|
|
|
|
// the span could have been moved elsewhere
|
|
|
|
|
goto retry
|
|
|
|
|
}
|
|
|
|
|
if s.sweepgen == sg-1 {
|
|
|
|
|
// the span is being sweept by background sweeper, skip
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
// already swept empty span,
|
|
|
|
|
// all subsequent ones must also be either swept or in process of sweeping
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
return n
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Sweeps and reclaims at least npage pages into heap.
|
|
|
|
|
// Called before allocating npage pages.
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) reclaim(npage uintptr) {
|
2014-11-11 17:05:02 -05:00
|
|
|
// First try to sweep busy spans with large objects of size >= npage,
|
|
|
|
|
// this has good chances of reclaiming the necessary space.
|
|
|
|
|
for i := int(npage); i < len(h.busy); i++ {
|
2015-11-11 16:13:51 -08:00
|
|
|
if h.reclaimList(&h.busy[i], npage) != 0 {
|
2014-11-11 17:05:02 -05:00
|
|
|
return // Bingo!
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Then -- even larger objects.
|
2015-11-11 16:13:51 -08:00
|
|
|
if h.reclaimList(&h.busylarge, npage) != 0 {
|
2014-11-11 17:05:02 -05:00
|
|
|
return // Bingo!
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Now try smaller objects.
|
|
|
|
|
// One such object is not enough, so we need to reclaim several of them.
|
|
|
|
|
reclaimed := uintptr(0)
|
|
|
|
|
for i := 0; i < int(npage) && i < len(h.busy); i++ {
|
2015-11-11 16:13:51 -08:00
|
|
|
reclaimed += h.reclaimList(&h.busy[i], npage-reclaimed)
|
2014-11-11 17:05:02 -05:00
|
|
|
if reclaimed >= npage {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Now sweep everything that is not yet swept.
|
|
|
|
|
unlock(&h.lock)
|
|
|
|
|
for {
|
|
|
|
|
n := sweepone()
|
|
|
|
|
if n == ^uintptr(0) { // all spans are swept
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
reclaimed += n
|
|
|
|
|
if reclaimed >= npage {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
lock(&h.lock)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Allocate a new span of npage pages from the heap for GC'd memory
|
|
|
|
|
// and record its size class in the HeapMap and HeapMapCache.
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) alloc_m(npage uintptr, sizeclass int32, large bool) *mspan {
|
2014-11-11 17:05:02 -05:00
|
|
|
_g_ := getg()
|
|
|
|
|
if _g_ != _g_.m.g0 {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("_mheap_alloc not on g0 stack")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
lock(&h.lock)
|
|
|
|
|
|
|
|
|
|
// To prevent excessive heap growth, before allocating n pages
|
|
|
|
|
// we need to sweep and reclaim at least n pages.
|
|
|
|
|
if h.sweepdone == 0 {
|
runtime: finish sweeping before concurrent GC starts
Currently, the concurrent sweep follows a 1:1 rule: when allocation
needs a span, it sweeps a span (likewise, when a large allocation
needs N pages, it sweeps until it frees N pages). This rule worked
well for the STW collector (especially when GOGC==100) because it did
no more sweeping than necessary to keep the heap from growing, would
generally finish sweeping just before GC, and ensured good temporal
locality between sweeping a page and allocating from it.
It doesn't work well with concurrent GC. Since concurrent GC requires
starting GC earlier (sometimes much earlier), the sweep often won't be
done when GC starts. Unfortunately, the first thing GC has to do is
finish the sweep. In the mean time, the mutator can continue
allocating, pushing the heap size even closer to the goal size. This
worked okay with the 7/8ths trigger, but it gets into a vicious cycle
with the GC trigger controller: if the mutator is allocating quickly
and driving the trigger lower, more and more sweep work will be left
to GC; this both causes GC to take longer (allowing the mutator to
allocate more during GC) and delays the start of the concurrent mark
phase, which throws off the GC controller's statistics and generally
causes it to push the trigger even lower.
As an example of a particularly bad case, the garbage benchmark with
GOMAXPROCS=4 and -benchmem 512 (MB) spends the first 0.4-0.8 seconds
of each GC cycle sweeping, during which the heap grows by between
109MB and 252MB.
To fix this, this change replaces the 1:1 sweep rule with a
proportional sweep rule. At the end of GC, GC knows exactly how much
heap allocation will occur before the next concurrent GC as well as
how many span pages must be swept. This change computes this "sweep
ratio" and when the mallocgc asks for a span, the mcentral sweeps
enough spans to bring the swept span count into ratio with the
allocated byte count.
On the benchmark from above, this entirely eliminates sweeping at the
beginning of GC, which reduces the time between startGC readying the
GC goroutine and GC stopping the world for sweep termination to ~100µs
during which the heap grows at most 134KB.
Change-Id: I35422d6bba0c2310d48bb1f8f30a72d29e98c1af
Reviewed-on: https://go-review.googlesource.com/8921
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-13 23:34:57 -04:00
|
|
|
// TODO(austin): This tends to sweep a large number of
|
|
|
|
|
// spans in order to find a few completely free spans
|
|
|
|
|
// (for example, in the garbage benchmark, this sweeps
|
|
|
|
|
// ~30x the number of pages its trying to allocate).
|
|
|
|
|
// If GC kept a bit for whether there were any marks
|
|
|
|
|
// in a span, we could release these free spans
|
|
|
|
|
// at the end of GC and eliminate this entirely.
|
2017-04-14 13:52:27 -04:00
|
|
|
if trace.enabled {
|
|
|
|
|
traceGCSweepStart()
|
|
|
|
|
}
|
2015-11-11 16:13:51 -08:00
|
|
|
h.reclaim(npage)
|
2017-04-14 13:52:27 -04:00
|
|
|
if trace.enabled {
|
|
|
|
|
traceGCSweepDone()
|
|
|
|
|
}
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// transfer stats from cache to global
|
2015-05-04 16:10:49 -04:00
|
|
|
memstats.heap_scan += uint64(_g_.m.mcache.local_scan)
|
|
|
|
|
_g_.m.mcache.local_scan = 0
|
2014-11-11 17:05:02 -05:00
|
|
|
memstats.tinyallocs += uint64(_g_.m.mcache.local_tinyallocs)
|
|
|
|
|
_g_.m.mcache.local_tinyallocs = 0
|
|
|
|
|
|
2017-03-22 13:45:12 -04:00
|
|
|
s := h.allocSpanLocked(npage, &memstats.heap_inuse)
|
2014-11-11 17:05:02 -05:00
|
|
|
if s != nil {
|
|
|
|
|
// Record span info, because gc needs to be
|
|
|
|
|
// able to map interior pointer to containing span.
|
2015-11-02 14:09:24 -05:00
|
|
|
atomic.Store(&s.sweepgen, h.sweepgen)
|
runtime: make sweep time proportional to in-use spans
Currently sweeping walks the list of all spans, which means the work
in sweeping is proportional to the maximum number of spans ever used.
If the heap was once large but is now small, this causes an
amortization failure: on a small heap, GCs happen frequently, but a
full sweep still has to happen in each GC cycle, which means we spent
a lot of time in sweeping.
Fix this by creating a separate list consisting of just the in-use
spans to be swept, so sweeping is proportional to the number of in-use
spans (which is proportional to the live heap). Specifically, we
create two lists: a list of unswept in-use spans and a list of swept
in-use spans. At the start of the sweep cycle, the swept list becomes
the unswept list and the new swept list is empty. Allocating a new
in-use span adds it to the swept list. Sweeping moves spans from the
unswept list to the swept list.
This fixes the amortization problem because a shrinking heap moves
spans off the unswept list without adding them to the swept list,
reducing the time required by the next sweep cycle.
Updates #9265. This fix eliminates almost all of the time spent in
sweepone; however, markrootSpans has essentially the same bug, so now
the test program from this issue spends all of its time in
markrootSpans.
No significant effect on other benchmarks.
Change-Id: Ib382e82790aad907da1c127e62b3ab45d7a4ac1e
Reviewed-on: https://go-review.googlesource.com/30535
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rick Hudson <rlh@golang.org>
2016-10-05 17:50:39 -04:00
|
|
|
h.sweepSpans[h.sweepgen/2%2].push(s) // Add to swept in-use list.
|
2014-11-11 17:05:02 -05:00
|
|
|
s.state = _MSpanInUse
|
2016-02-16 17:16:43 -05:00
|
|
|
s.allocCount = 0
|
2014-11-11 17:05:02 -05:00
|
|
|
s.sizeclass = uint8(sizeclass)
|
|
|
|
|
if sizeclass == 0 {
|
|
|
|
|
s.elemsize = s.npages << _PageShift
|
2015-03-04 11:34:50 -05:00
|
|
|
s.divShift = 0
|
|
|
|
|
s.divMul = 0
|
|
|
|
|
s.divShift2 = 0
|
2015-04-15 17:08:58 -04:00
|
|
|
s.baseMask = 0
|
2014-11-11 17:05:02 -05:00
|
|
|
} else {
|
|
|
|
|
s.elemsize = uintptr(class_to_size[sizeclass])
|
2015-03-04 11:34:50 -05:00
|
|
|
m := &class_to_divmagic[sizeclass]
|
|
|
|
|
s.divShift = m.shift
|
|
|
|
|
s.divMul = m.mul
|
|
|
|
|
s.divShift2 = m.shift2
|
2015-04-15 17:08:58 -04:00
|
|
|
s.baseMask = m.baseMask
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// update stats, sweep lists
|
2015-09-26 12:31:59 -04:00
|
|
|
h.pagesInUse += uint64(npage)
|
2014-11-11 17:05:02 -05:00
|
|
|
if large {
|
|
|
|
|
memstats.heap_objects++
|
2017-01-03 10:15:55 -07:00
|
|
|
mheap_.largealloc += uint64(s.elemsize)
|
|
|
|
|
mheap_.nlargealloc++
|
runtime: fix (sometimes major) underestimation of heap_live
Currently, we update memstats.heap_live from mcache.local_cachealloc
whenever we lock the heap (e.g., to obtain a fresh span or to release
an unused span). However, under the right circumstances,
local_cachealloc can accumulate allocations up to the size of
the *entire heap* without flushing them to heap_live. Specifically,
since span allocations from an mcentral don't lock the heap, if a
large number of pages are held in an mcentral and the application
continues to use and free objects of that size class (e.g., the
BinaryTree17 benchmark), local_cachealloc won't be flushed until the
mcentral runs out of spans.
This is a problem because, unlike many of the memory statistics that
are purely informative, heap_live is used to determine when the
garbage collector should start and how hard it should work.
This commit eliminates local_cachealloc, instead atomically updating
heap_live directly. To control contention, we do this only when
obtaining a span from an mcentral. Furthermore, we make heap_live
conservative: allocating a span assumes that all free slots in that
span will be used and accounts for these when the span is
allocated, *before* the objects themselves are. This is important
because 1) this triggers the GC earlier than necessary rather than
potentially too late and 2) this leads to a conservative GC rate
rather than a GC rate that is potentially too low.
Alternatively, we could have flushed local_cachealloc when it passed
some threshold, but this would require determining a threshold and
would cause heap_live to underestimate the true value rather than
overestimate.
Fixes #12199.
name old time/op new time/op delta
BinaryTree17-12 2.88s ± 4% 2.88s ± 1% ~ (p=0.470 n=19+19)
Fannkuch11-12 2.48s ± 1% 2.48s ± 1% ~ (p=0.243 n=16+19)
FmtFprintfEmpty-12 50.9ns ± 2% 50.7ns ± 1% ~ (p=0.238 n=15+14)
FmtFprintfString-12 175ns ± 1% 171ns ± 1% -2.48% (p=0.000 n=18+18)
FmtFprintfInt-12 159ns ± 1% 158ns ± 1% -0.78% (p=0.000 n=19+18)
FmtFprintfIntInt-12 270ns ± 1% 265ns ± 2% -1.67% (p=0.000 n=18+18)
FmtFprintfPrefixedInt-12 235ns ± 1% 234ns ± 0% ~ (p=0.362 n=18+19)
FmtFprintfFloat-12 309ns ± 1% 308ns ± 1% -0.41% (p=0.001 n=18+19)
FmtManyArgs-12 1.10µs ± 1% 1.08µs ± 0% -1.96% (p=0.000 n=19+18)
GobDecode-12 7.81ms ± 1% 7.80ms ± 1% ~ (p=0.425 n=18+19)
GobEncode-12 6.53ms ± 1% 6.53ms ± 1% ~ (p=0.817 n=19+19)
Gzip-12 312ms ± 1% 312ms ± 2% ~ (p=0.967 n=19+20)
Gunzip-12 42.0ms ± 1% 41.9ms ± 1% ~ (p=0.172 n=19+19)
HTTPClientServer-12 63.7µs ± 1% 63.8µs ± 1% ~ (p=0.639 n=19+19)
JSONEncode-12 16.4ms ± 1% 16.4ms ± 1% ~ (p=0.954 n=19+19)
JSONDecode-12 58.5ms ± 1% 57.8ms ± 1% -1.27% (p=0.000 n=18+19)
Mandelbrot200-12 3.86ms ± 1% 3.88ms ± 0% +0.44% (p=0.000 n=18+18)
GoParse-12 3.67ms ± 2% 3.66ms ± 1% -0.52% (p=0.001 n=18+19)
RegexpMatchEasy0_32-12 100ns ± 1% 100ns ± 0% ~ (p=0.257 n=19+18)
RegexpMatchEasy0_1K-12 347ns ± 1% 347ns ± 1% ~ (p=0.527 n=18+18)
RegexpMatchEasy1_32-12 83.7ns ± 2% 83.1ns ± 2% ~ (p=0.096 n=18+19)
RegexpMatchEasy1_1K-12 509ns ± 1% 505ns ± 1% -0.75% (p=0.000 n=18+19)
RegexpMatchMedium_32-12 130ns ± 2% 129ns ± 1% ~ (p=0.962 n=20+20)
RegexpMatchMedium_1K-12 39.5µs ± 2% 39.4µs ± 1% ~ (p=0.376 n=20+19)
RegexpMatchHard_32-12 2.04µs ± 0% 2.04µs ± 1% ~ (p=0.195 n=18+17)
RegexpMatchHard_1K-12 61.4µs ± 1% 61.4µs ± 1% ~ (p=0.885 n=19+19)
Revcomp-12 540ms ± 2% 542ms ± 4% ~ (p=0.552 n=19+17)
Template-12 69.6ms ± 1% 71.2ms ± 1% +2.39% (p=0.000 n=20+20)
TimeParse-12 357ns ± 1% 357ns ± 1% ~ (p=0.883 n=18+20)
TimeFormat-12 379ns ± 1% 362ns ± 1% -4.53% (p=0.000 n=18+19)
[Geo mean] 62.0µs 61.8µs -0.44%
name old time/op new time/op delta
XBenchGarbage-12 5.89ms ± 2% 5.81ms ± 2% -1.41% (p=0.000 n=19+18)
Change-Id: I96b31cca6ae77c30693a891cff3fe663fa2447a0
Reviewed-on: https://go-review.googlesource.com/17748
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-12-11 17:49:14 -05:00
|
|
|
atomic.Xadd64(&memstats.heap_live, int64(npage<<_PageShift))
|
2014-11-11 17:05:02 -05:00
|
|
|
// Swept spans are at the end of lists.
|
2017-02-05 19:34:16 -05:00
|
|
|
if s.npages < uintptr(len(h.busy)) {
|
2015-11-11 16:13:51 -08:00
|
|
|
h.busy[s.npages].insertBack(s)
|
2014-11-11 17:05:02 -05:00
|
|
|
} else {
|
2015-11-11 16:13:51 -08:00
|
|
|
h.busylarge.insertBack(s)
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
runtime: directly track GC assist balance
Currently we track the per-G GC assist balance as two monotonically
increasing values: the bytes allocated by the G this cycle (gcalloc)
and the scan work performed by the G this cycle (gcscanwork). The
assist balance is hence assistRatio*gcalloc - gcscanwork.
This works, but has two important downsides:
1) It requires floating-point math to figure out if a G is in debt or
not. This makes it inappropriate to check for assist debt in the
hot path of mallocgc, so we only do this when a G allocates a new
span. As a result, Gs can operate "in the red", leading to
under-assist and extended GC cycle length.
2) Revising the assist ratio during a GC cycle can lead to an "assist
burst". If you think of plotting the scan work performed versus
heaps size, the assist ratio controls the slope of this line.
However, in the current system, the target line always passes
through 0 at the heap size that triggered GC, so if the runtime
increases the assist ratio, there has to be a potentially large
assist to jump from the current amount of scan work up to the new
target scan work for the current heap size.
This commit replaces this approach with directly tracking the GC
assist balance in terms of allocation credit bytes. Allocating N bytes
simply decreases this by N and assisting raises it by the amount of
scan work performed divided by the assist ratio (to get back to
bytes).
This will make it cheap to figure out if a G is in debt, which will
let us efficiently check if an assist is necessary *before* performing
an allocation and hence keep Gs "in the black".
This also fixes assist bursts because the assist ratio is now in terms
of *remaining* work, rather than work from the beginning of the GC
cycle. Hence, the plot of scan work versus heap size becomes
continuous: we can revise the slope, but this slope always starts from
where we are right now, rather than where we were at the beginning of
the cycle.
Change-Id: Ia821c5f07f8a433e8da7f195b52adfedd58bdf2c
Reviewed-on: https://go-review.googlesource.com/15408
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-10-04 20:16:57 -07:00
|
|
|
// heap_scan and heap_live were updated.
|
|
|
|
|
if gcBlackenEnabled != 0 {
|
|
|
|
|
gcController.revise()
|
|
|
|
|
}
|
|
|
|
|
|
2014-12-12 18:41:57 +01:00
|
|
|
if trace.enabled {
|
|
|
|
|
traceHeapAlloc()
|
|
|
|
|
}
|
2015-06-23 14:04:09 -04:00
|
|
|
|
2016-10-04 16:03:00 -04:00
|
|
|
// h.spans is accessed concurrently without synchronization
|
2015-06-23 14:04:09 -04:00
|
|
|
// from other threads. Hence, there must be a store/store
|
2016-10-04 16:03:00 -04:00
|
|
|
// barrier here to ensure the writes to h.spans above happen
|
2015-06-23 14:04:09 -04:00
|
|
|
// before the caller can publish a pointer p to an object
|
|
|
|
|
// allocated from s. As soon as this happens, the garbage
|
|
|
|
|
// collector running on another processor could read p and
|
2016-10-04 16:03:00 -04:00
|
|
|
// look up s in h.spans. The unlock acts as the barrier to
|
2015-06-23 14:04:09 -04:00
|
|
|
// order these writes. On the read side, the data dependency
|
2016-10-04 16:03:00 -04:00
|
|
|
// between p and the index in h.spans orders the reads.
|
2014-11-11 17:05:02 -05:00
|
|
|
unlock(&h.lock)
|
|
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) alloc(npage uintptr, sizeclass int32, large bool, needzero bool) *mspan {
|
2014-11-11 17:05:02 -05:00
|
|
|
// Don't do any operations that lock the heap on the G stack.
|
|
|
|
|
// It might trigger stack growth, and the stack growth code needs
|
|
|
|
|
// to be able to allocate heap.
|
|
|
|
|
var s *mspan
|
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
|
|
|
systemstack(func() {
|
2015-11-11 16:13:51 -08:00
|
|
|
s = h.alloc_m(npage, sizeclass, large)
|
2014-11-11 17:05:02 -05:00
|
|
|
})
|
|
|
|
|
|
|
|
|
|
if s != nil {
|
|
|
|
|
if needzero && s.needzero != 0 {
|
2016-10-17 18:41:56 -04:00
|
|
|
memclrNoHeapPointers(unsafe.Pointer(s.base()), s.npages<<_PageShift)
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
s.needzero = 0
|
|
|
|
|
}
|
|
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-22 13:45:12 -04:00
|
|
|
// allocManual allocates a manually-managed span of npage pages.
|
|
|
|
|
// allocManual returns nil if allocation fails.
|
|
|
|
|
//
|
|
|
|
|
// allocManual adds the bytes used to *stat, which should be a
|
|
|
|
|
// memstats in-use field. Unlike allocations in the GC'd heap, the
|
|
|
|
|
// allocation does *not* count toward heap_inuse or heap_sys.
|
2017-03-16 14:46:53 -04:00
|
|
|
//
|
|
|
|
|
// The memory backing the returned span may not be zeroed if
|
|
|
|
|
// span.needzero is set.
|
|
|
|
|
//
|
|
|
|
|
// allocManual must be called on the system stack to prevent stack
|
|
|
|
|
// growth. Since this is used by the stack allocator, stack growth
|
|
|
|
|
// during allocManual would self-deadlock.
|
|
|
|
|
//
|
|
|
|
|
//go:systemstack
|
|
|
|
|
func (h *mheap) allocManual(npage uintptr, stat *uint64) *mspan {
|
2014-11-11 17:05:02 -05:00
|
|
|
lock(&h.lock)
|
2017-03-22 13:45:12 -04:00
|
|
|
s := h.allocSpanLocked(npage, stat)
|
2014-11-11 17:05:02 -05:00
|
|
|
if s != nil {
|
2017-03-16 14:16:31 -04:00
|
|
|
s.state = _MSpanManual
|
2017-03-16 15:02:02 -04:00
|
|
|
s.manualFreeList = 0
|
2016-02-16 17:16:43 -05:00
|
|
|
s.allocCount = 0
|
2017-03-16 14:55:10 -04:00
|
|
|
s.sizeclass = 0
|
|
|
|
|
s.nelems = 0
|
|
|
|
|
s.elemsize = 0
|
|
|
|
|
s.limit = s.base() + s.npages<<_PageShift
|
2017-03-22 13:45:12 -04:00
|
|
|
// Manually manged memory doesn't count toward heap_sys.
|
|
|
|
|
memstats.heap_sys -= uint64(s.npages << _PageShift)
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
2015-06-23 14:04:09 -04:00
|
|
|
|
2017-03-16 14:46:53 -04:00
|
|
|
// This unlock acts as a release barrier. See mheap.alloc_m.
|
2014-11-11 17:05:02 -05:00
|
|
|
unlock(&h.lock)
|
2017-03-27 14:20:35 -04:00
|
|
|
|
2014-11-11 17:05:02 -05:00
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Allocates a span of the given size. h must be locked.
|
|
|
|
|
// The returned span has been removed from the
|
|
|
|
|
// free list, but its state is still MSpanFree.
|
2017-03-22 13:45:12 -04:00
|
|
|
func (h *mheap) allocSpanLocked(npage uintptr, stat *uint64) *mspan {
|
2015-10-15 15:59:49 -07:00
|
|
|
var list *mSpanList
|
2014-11-11 17:05:02 -05:00
|
|
|
var s *mspan
|
|
|
|
|
|
|
|
|
|
// Try in fixed-size lists up to max.
|
|
|
|
|
for i := int(npage); i < len(h.free); i++ {
|
2015-10-15 15:59:49 -07:00
|
|
|
list = &h.free[i]
|
2015-11-11 16:13:51 -08:00
|
|
|
if !list.isEmpty() {
|
2015-10-15 15:59:49 -07:00
|
|
|
s = list.first
|
2017-03-27 14:20:35 -04:00
|
|
|
list.remove(s)
|
2014-11-11 17:05:02 -05:00
|
|
|
goto HaveSpan
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// Best fit in list of large spans.
|
2017-03-27 14:20:35 -04:00
|
|
|
s = h.allocLarge(npage) // allocLarge removed s from h.freelarge for us
|
2014-11-11 17:05:02 -05:00
|
|
|
if s == nil {
|
2015-11-11 16:13:51 -08:00
|
|
|
if !h.grow(npage) {
|
2014-11-11 17:05:02 -05:00
|
|
|
return nil
|
|
|
|
|
}
|
2015-11-11 16:13:51 -08:00
|
|
|
s = h.allocLarge(npage)
|
2014-11-11 17:05:02 -05:00
|
|
|
if s == nil {
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HaveSpan:
|
|
|
|
|
// Mark span in use.
|
|
|
|
|
if s.state != _MSpanFree {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("MHeap_AllocLocked - MSpan not free")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
if s.npages < npage {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("MHeap_AllocLocked - bad npages")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
if s.npreleased > 0 {
|
2016-03-14 12:02:02 -04:00
|
|
|
sysUsed(unsafe.Pointer(s.base()), s.npages<<_PageShift)
|
2014-11-11 17:05:02 -05:00
|
|
|
memstats.heap_released -= uint64(s.npreleased << _PageShift)
|
|
|
|
|
s.npreleased = 0
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if s.npages > npage {
|
|
|
|
|
// Trim extra and put it back in the heap.
|
2015-11-11 16:13:51 -08:00
|
|
|
t := (*mspan)(h.spanalloc.alloc())
|
2016-04-28 11:21:01 -04:00
|
|
|
t.init(s.base()+npage<<_PageShift, s.npages-npage)
|
2014-11-11 17:05:02 -05:00
|
|
|
s.npages = npage
|
2016-04-28 11:21:01 -04:00
|
|
|
p := (t.base() - h.arena_start) >> _PageShift
|
2014-11-11 17:05:02 -05:00
|
|
|
if p > 0 {
|
2016-10-04 16:03:00 -04:00
|
|
|
h.spans[p-1] = s
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
2016-10-04 16:03:00 -04:00
|
|
|
h.spans[p] = t
|
|
|
|
|
h.spans[p+t.npages-1] = t
|
2014-11-11 17:05:02 -05:00
|
|
|
t.needzero = s.needzero
|
2017-03-16 14:16:31 -04:00
|
|
|
s.state = _MSpanManual // prevent coalescing with s
|
|
|
|
|
t.state = _MSpanManual
|
2015-11-11 16:13:51 -08:00
|
|
|
h.freeSpanLocked(t, false, false, s.unusedsince)
|
2014-11-11 17:05:02 -05:00
|
|
|
s.state = _MSpanFree
|
|
|
|
|
}
|
|
|
|
|
s.unusedsince = 0
|
|
|
|
|
|
2016-04-28 11:21:01 -04:00
|
|
|
p := (s.base() - h.arena_start) >> _PageShift
|
2014-11-11 17:05:02 -05:00
|
|
|
for n := uintptr(0); n < npage; n++ {
|
2016-10-04 16:03:00 -04:00
|
|
|
h.spans[p+n] = s
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
2017-03-22 13:45:12 -04:00
|
|
|
*stat += uint64(npage << _PageShift)
|
2014-11-11 17:05:02 -05:00
|
|
|
memstats.heap_idle -= uint64(npage << _PageShift)
|
|
|
|
|
|
|
|
|
|
//println("spanalloc", hex(s.start<<_PageShift))
|
2015-11-11 16:13:51 -08:00
|
|
|
if s.inList() {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("still in list")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-27 14:20:35 -04:00
|
|
|
// Large spans have a minimum size of 1MByte. The maximum number of large spans to support
|
|
|
|
|
// 1TBytes is 1 million, experimentation using random sizes indicates that the depth of
|
|
|
|
|
// the tree is less that 2x that of a perfectly balanced tree. For 1TByte can be referenced
|
|
|
|
|
// by a perfectly balanced tree with a a depth of 20. Twice that is an acceptable 40.
|
|
|
|
|
func (h *mheap) isLargeSpan(npages uintptr) bool {
|
|
|
|
|
return npages >= uintptr(len(h.free))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Allocate a span of exactly npage pages from the treap of large spans.
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) allocLarge(npage uintptr) *mspan {
|
2017-04-06 13:18:45 +01:00
|
|
|
return bestFitTreap(&h.freelarge, npage)
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
2017-03-27 14:20:35 -04:00
|
|
|
// Search treap for smallest span with >= npage pages.
|
|
|
|
|
// If there are multiple smallest spans, select the one
|
2014-11-11 17:05:02 -05:00
|
|
|
// with the earliest starting address.
|
2017-04-06 13:18:45 +01:00
|
|
|
func bestFitTreap(treap *mTreap, npage uintptr) *mspan {
|
2017-03-27 14:20:35 -04:00
|
|
|
return treap.remove(npage)
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Try to add at least npage pages of memory to the heap,
|
|
|
|
|
// returning whether it worked.
|
2015-09-26 12:31:59 -04:00
|
|
|
//
|
|
|
|
|
// h must be locked.
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) grow(npage uintptr) bool {
|
2014-11-11 17:05:02 -05:00
|
|
|
// Ask for a big chunk, to reduce the number of mappings
|
|
|
|
|
// the operating system needs to track; also amortizes
|
|
|
|
|
// the overhead of an operating system mapping.
|
|
|
|
|
// Allocate a multiple of 64kB.
|
|
|
|
|
npage = round(npage, (64<<10)/_PageSize)
|
|
|
|
|
ask := npage << _PageShift
|
|
|
|
|
if ask < _HeapAllocChunk {
|
|
|
|
|
ask = _HeapAllocChunk
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
v := h.sysAlloc(ask)
|
2014-11-11 17:05:02 -05:00
|
|
|
if v == nil {
|
|
|
|
|
if ask > npage<<_PageShift {
|
|
|
|
|
ask = npage << _PageShift
|
2015-11-11 16:13:51 -08:00
|
|
|
v = h.sysAlloc(ask)
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
if v == nil {
|
|
|
|
|
print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n")
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Create a fake "in use" span and free it, so that the
|
|
|
|
|
// right coalescing happens.
|
2015-11-11 16:13:51 -08:00
|
|
|
s := (*mspan)(h.spanalloc.alloc())
|
2016-04-28 11:21:01 -04:00
|
|
|
s.init(uintptr(v), ask>>_PageShift)
|
|
|
|
|
p := (s.base() - h.arena_start) >> _PageShift
|
2015-02-24 09:25:09 -08:00
|
|
|
for i := p; i < p+s.npages; i++ {
|
2016-10-04 16:03:00 -04:00
|
|
|
h.spans[i] = s
|
2015-02-24 09:25:09 -08:00
|
|
|
}
|
2015-11-02 14:09:24 -05:00
|
|
|
atomic.Store(&s.sweepgen, h.sweepgen)
|
2014-11-11 17:05:02 -05:00
|
|
|
s.state = _MSpanInUse
|
runtime: fix pagesInUse accounting
When we grow the heap, we create a temporary "in use" span for the
memory acquired from the OS and then free that span to link it into
the heap. Hence, we (1) increase pagesInUse when we make the temporary
span so that (2) freeing the span will correctly decrease it.
However, currently step (1) increases pagesInUse by the number of
pages requested from the heap, while step (2) decreases it by the
number of pages requested from the OS (the size of the temporary
span). These aren't necessarily the same, since we round up the number
of pages we request from the OS, so steps 1 and 2 don't necessarily
cancel out like they're supposed to. Over time, this can add up and
cause pagesInUse to underflow and wrap around to 2^64. The garbage
collector computes the sweep ratio from this, so if this happens, the
sweep ratio becomes effectively infinite, causing the first allocation
on each P in a sweep cycle to sweep the entire heap. This makes
sweeping effectively STW.
Fix this by increasing pagesInUse in step 1 by the number of pages
requested from the OS, so that the two steps correctly cancel out. We
add a test that checks that the running total matches the actual state
of the heap.
Fixes #15022. For 1.6.x.
Change-Id: Iefd9d6abe37d0d447cbdbdf9941662e4f18eeffc
Reviewed-on: https://go-review.googlesource.com/21280
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2016-03-29 12:28:24 -04:00
|
|
|
h.pagesInUse += uint64(s.npages)
|
2015-11-11 16:13:51 -08:00
|
|
|
h.freeSpanLocked(s, false, true, 0)
|
2014-11-11 17:05:02 -05:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Look up the span at the given address.
|
|
|
|
|
// Address is guaranteed to be in map
|
|
|
|
|
// and is guaranteed to be start or end of span.
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) lookup(v unsafe.Pointer) *mspan {
|
2014-11-11 17:05:02 -05:00
|
|
|
p := uintptr(v)
|
2015-10-26 17:53:22 -07:00
|
|
|
p -= h.arena_start
|
2016-10-04 16:03:00 -04:00
|
|
|
return h.spans[p>>_PageShift]
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Look up the span at the given address.
|
|
|
|
|
// Address is *not* guaranteed to be in map
|
|
|
|
|
// and may be anywhere in the span.
|
|
|
|
|
// Map entries for the middle of a span are only
|
2016-03-01 23:21:55 +00:00
|
|
|
// valid for allocated spans. Free spans may have
|
2014-11-11 17:05:02 -05:00
|
|
|
// other garbage in their middles, so we have to
|
|
|
|
|
// check for that.
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) lookupMaybe(v unsafe.Pointer) *mspan {
|
2015-10-26 17:53:22 -07:00
|
|
|
if uintptr(v) < h.arena_start || uintptr(v) >= h.arena_used {
|
2014-11-11 17:05:02 -05:00
|
|
|
return nil
|
|
|
|
|
}
|
2016-10-04 16:03:00 -04:00
|
|
|
s := h.spans[(uintptr(v)-h.arena_start)>>_PageShift]
|
2016-04-28 11:21:01 -04:00
|
|
|
if s == nil || uintptr(v) < s.base() || uintptr(v) >= uintptr(unsafe.Pointer(s.limit)) || s.state != _MSpanInUse {
|
2014-11-11 17:05:02 -05:00
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Free the span back into the heap.
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) freeSpan(s *mspan, acct int32) {
|
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
|
|
|
systemstack(func() {
|
2014-11-11 17:05:02 -05:00
|
|
|
mp := getg().m
|
|
|
|
|
lock(&h.lock)
|
2015-05-04 16:10:49 -04:00
|
|
|
memstats.heap_scan += uint64(mp.mcache.local_scan)
|
|
|
|
|
mp.mcache.local_scan = 0
|
2014-11-11 17:05:02 -05:00
|
|
|
memstats.tinyallocs += uint64(mp.mcache.local_tinyallocs)
|
|
|
|
|
mp.mcache.local_tinyallocs = 0
|
2016-03-02 12:15:02 -05:00
|
|
|
if msanenabled {
|
|
|
|
|
// Tell msan that this entire span is no longer in use.
|
|
|
|
|
base := unsafe.Pointer(s.base())
|
|
|
|
|
bytes := s.npages << _PageShift
|
|
|
|
|
msanfree(base, bytes)
|
|
|
|
|
}
|
2014-11-11 17:05:02 -05:00
|
|
|
if acct != 0 {
|
|
|
|
|
memstats.heap_objects--
|
|
|
|
|
}
|
2015-10-07 22:37:15 -07:00
|
|
|
if gcBlackenEnabled != 0 {
|
runtime: fix (sometimes major) underestimation of heap_live
Currently, we update memstats.heap_live from mcache.local_cachealloc
whenever we lock the heap (e.g., to obtain a fresh span or to release
an unused span). However, under the right circumstances,
local_cachealloc can accumulate allocations up to the size of
the *entire heap* without flushing them to heap_live. Specifically,
since span allocations from an mcentral don't lock the heap, if a
large number of pages are held in an mcentral and the application
continues to use and free objects of that size class (e.g., the
BinaryTree17 benchmark), local_cachealloc won't be flushed until the
mcentral runs out of spans.
This is a problem because, unlike many of the memory statistics that
are purely informative, heap_live is used to determine when the
garbage collector should start and how hard it should work.
This commit eliminates local_cachealloc, instead atomically updating
heap_live directly. To control contention, we do this only when
obtaining a span from an mcentral. Furthermore, we make heap_live
conservative: allocating a span assumes that all free slots in that
span will be used and accounts for these when the span is
allocated, *before* the objects themselves are. This is important
because 1) this triggers the GC earlier than necessary rather than
potentially too late and 2) this leads to a conservative GC rate
rather than a GC rate that is potentially too low.
Alternatively, we could have flushed local_cachealloc when it passed
some threshold, but this would require determining a threshold and
would cause heap_live to underestimate the true value rather than
overestimate.
Fixes #12199.
name old time/op new time/op delta
BinaryTree17-12 2.88s ± 4% 2.88s ± 1% ~ (p=0.470 n=19+19)
Fannkuch11-12 2.48s ± 1% 2.48s ± 1% ~ (p=0.243 n=16+19)
FmtFprintfEmpty-12 50.9ns ± 2% 50.7ns ± 1% ~ (p=0.238 n=15+14)
FmtFprintfString-12 175ns ± 1% 171ns ± 1% -2.48% (p=0.000 n=18+18)
FmtFprintfInt-12 159ns ± 1% 158ns ± 1% -0.78% (p=0.000 n=19+18)
FmtFprintfIntInt-12 270ns ± 1% 265ns ± 2% -1.67% (p=0.000 n=18+18)
FmtFprintfPrefixedInt-12 235ns ± 1% 234ns ± 0% ~ (p=0.362 n=18+19)
FmtFprintfFloat-12 309ns ± 1% 308ns ± 1% -0.41% (p=0.001 n=18+19)
FmtManyArgs-12 1.10µs ± 1% 1.08µs ± 0% -1.96% (p=0.000 n=19+18)
GobDecode-12 7.81ms ± 1% 7.80ms ± 1% ~ (p=0.425 n=18+19)
GobEncode-12 6.53ms ± 1% 6.53ms ± 1% ~ (p=0.817 n=19+19)
Gzip-12 312ms ± 1% 312ms ± 2% ~ (p=0.967 n=19+20)
Gunzip-12 42.0ms ± 1% 41.9ms ± 1% ~ (p=0.172 n=19+19)
HTTPClientServer-12 63.7µs ± 1% 63.8µs ± 1% ~ (p=0.639 n=19+19)
JSONEncode-12 16.4ms ± 1% 16.4ms ± 1% ~ (p=0.954 n=19+19)
JSONDecode-12 58.5ms ± 1% 57.8ms ± 1% -1.27% (p=0.000 n=18+19)
Mandelbrot200-12 3.86ms ± 1% 3.88ms ± 0% +0.44% (p=0.000 n=18+18)
GoParse-12 3.67ms ± 2% 3.66ms ± 1% -0.52% (p=0.001 n=18+19)
RegexpMatchEasy0_32-12 100ns ± 1% 100ns ± 0% ~ (p=0.257 n=19+18)
RegexpMatchEasy0_1K-12 347ns ± 1% 347ns ± 1% ~ (p=0.527 n=18+18)
RegexpMatchEasy1_32-12 83.7ns ± 2% 83.1ns ± 2% ~ (p=0.096 n=18+19)
RegexpMatchEasy1_1K-12 509ns ± 1% 505ns ± 1% -0.75% (p=0.000 n=18+19)
RegexpMatchMedium_32-12 130ns ± 2% 129ns ± 1% ~ (p=0.962 n=20+20)
RegexpMatchMedium_1K-12 39.5µs ± 2% 39.4µs ± 1% ~ (p=0.376 n=20+19)
RegexpMatchHard_32-12 2.04µs ± 0% 2.04µs ± 1% ~ (p=0.195 n=18+17)
RegexpMatchHard_1K-12 61.4µs ± 1% 61.4µs ± 1% ~ (p=0.885 n=19+19)
Revcomp-12 540ms ± 2% 542ms ± 4% ~ (p=0.552 n=19+17)
Template-12 69.6ms ± 1% 71.2ms ± 1% +2.39% (p=0.000 n=20+20)
TimeParse-12 357ns ± 1% 357ns ± 1% ~ (p=0.883 n=18+20)
TimeFormat-12 379ns ± 1% 362ns ± 1% -4.53% (p=0.000 n=18+19)
[Geo mean] 62.0µs 61.8µs -0.44%
name old time/op new time/op delta
XBenchGarbage-12 5.89ms ± 2% 5.81ms ± 2% -1.41% (p=0.000 n=19+18)
Change-Id: I96b31cca6ae77c30693a891cff3fe663fa2447a0
Reviewed-on: https://go-review.googlesource.com/17748
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-12-11 17:49:14 -05:00
|
|
|
// heap_scan changed.
|
2015-10-07 22:37:15 -07:00
|
|
|
gcController.revise()
|
|
|
|
|
}
|
2015-11-11 16:13:51 -08:00
|
|
|
h.freeSpanLocked(s, true, true, 0)
|
2014-11-11 17:05:02 -05:00
|
|
|
unlock(&h.lock)
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-16 14:46:53 -04:00
|
|
|
// freeManual frees a manually-managed span returned by allocManual.
|
|
|
|
|
// stat must be the same as the stat passed to the allocManual that
|
|
|
|
|
// allocated s.
|
|
|
|
|
//
|
|
|
|
|
// This must only be called when gcphase == _GCoff. See mSpanState for
|
|
|
|
|
// an explanation.
|
|
|
|
|
//
|
|
|
|
|
// freeManual must be called on the system stack to prevent stack
|
|
|
|
|
// growth, just like allocManual.
|
|
|
|
|
//
|
|
|
|
|
//go:systemstack
|
|
|
|
|
func (h *mheap) freeManual(s *mspan, stat *uint64) {
|
2014-11-11 17:05:02 -05:00
|
|
|
s.needzero = 1
|
|
|
|
|
lock(&h.lock)
|
2017-03-16 14:46:53 -04:00
|
|
|
*stat -= uint64(s.npages << _PageShift)
|
2017-03-22 13:45:12 -04:00
|
|
|
memstats.heap_sys += uint64(s.npages << _PageShift)
|
|
|
|
|
h.freeSpanLocked(s, false, true, 0)
|
2014-11-11 17:05:02 -05:00
|
|
|
unlock(&h.lock)
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-15 15:59:49 -07:00
|
|
|
// s must be on a busy list (h.busy or h.busylarge) or unlinked.
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince int64) {
|
2014-11-11 17:05:02 -05:00
|
|
|
switch s.state {
|
2017-03-16 14:16:31 -04:00
|
|
|
case _MSpanManual:
|
2016-02-16 17:16:43 -05:00
|
|
|
if s.allocCount != 0 {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("MHeap_FreeSpanLocked - invalid stack free")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
case _MSpanInUse:
|
2016-02-16 17:16:43 -05:00
|
|
|
if s.allocCount != 0 || s.sweepgen != h.sweepgen {
|
2016-04-28 10:59:00 -04:00
|
|
|
print("MHeap_FreeSpanLocked - span ", s, " ptr ", hex(s.base()), " allocCount ", s.allocCount, " sweepgen ", s.sweepgen, "/", h.sweepgen, "\n")
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("MHeap_FreeSpanLocked - invalid free")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
2015-09-26 12:31:59 -04:00
|
|
|
h.pagesInUse -= uint64(s.npages)
|
2014-11-11 17:05:02 -05:00
|
|
|
default:
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("MHeap_FreeSpanLocked - invalid span state")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if acctinuse {
|
|
|
|
|
memstats.heap_inuse -= uint64(s.npages << _PageShift)
|
|
|
|
|
}
|
|
|
|
|
if acctidle {
|
|
|
|
|
memstats.heap_idle += uint64(s.npages << _PageShift)
|
|
|
|
|
}
|
|
|
|
|
s.state = _MSpanFree
|
2015-11-11 16:13:51 -08:00
|
|
|
if s.inList() {
|
|
|
|
|
h.busyList(s.npages).remove(s)
|
2015-10-15 15:59:49 -07:00
|
|
|
}
|
2014-11-11 17:05:02 -05:00
|
|
|
|
|
|
|
|
// Stamp newly unused spans. The scavenger will use that
|
|
|
|
|
// info to potentially give back some pages to the OS.
|
2015-02-10 18:51:13 +03:00
|
|
|
s.unusedsince = unusedsince
|
|
|
|
|
if unusedsince == 0 {
|
|
|
|
|
s.unusedsince = nanotime()
|
|
|
|
|
}
|
2014-11-11 17:05:02 -05:00
|
|
|
s.npreleased = 0
|
|
|
|
|
|
|
|
|
|
// Coalesce with earlier, later spans.
|
2016-04-28 11:21:01 -04:00
|
|
|
p := (s.base() - h.arena_start) >> _PageShift
|
2014-11-11 17:05:02 -05:00
|
|
|
if p > 0 {
|
2017-03-27 14:20:35 -04:00
|
|
|
before := h.spans[p-1]
|
|
|
|
|
if before != nil && before.state == _MSpanFree {
|
|
|
|
|
// Now adjust s.
|
|
|
|
|
s.startAddr = before.startAddr
|
|
|
|
|
s.npages += before.npages
|
|
|
|
|
s.npreleased = before.npreleased // absorb released pages
|
|
|
|
|
s.needzero |= before.needzero
|
|
|
|
|
p -= before.npages
|
2016-10-04 16:03:00 -04:00
|
|
|
h.spans[p] = s
|
2017-03-27 14:20:35 -04:00
|
|
|
// The size is potentially changing so the treap needs to delete adjacent nodes and
|
|
|
|
|
// insert back as a combined node.
|
|
|
|
|
if h.isLargeSpan(before.npages) {
|
|
|
|
|
// We have a t, it is large so it has to be in the treap so we can remove it.
|
|
|
|
|
h.freelarge.removeSpan(before)
|
|
|
|
|
} else {
|
|
|
|
|
h.freeList(before.npages).remove(before)
|
|
|
|
|
}
|
|
|
|
|
before.state = _MSpanDead
|
|
|
|
|
h.spanalloc.free(unsafe.Pointer(before))
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
}
|
2017-03-27 14:20:35 -04:00
|
|
|
|
|
|
|
|
// Now check to see if next (greater addresses) span is free and can be coalesced.
|
2016-10-04 16:22:41 -04:00
|
|
|
if (p + s.npages) < uintptr(len(h.spans)) {
|
2017-03-27 14:20:35 -04:00
|
|
|
after := h.spans[p+s.npages]
|
|
|
|
|
if after != nil && after.state == _MSpanFree {
|
|
|
|
|
s.npages += after.npages
|
|
|
|
|
s.npreleased += after.npreleased
|
|
|
|
|
s.needzero |= after.needzero
|
2016-10-04 16:03:00 -04:00
|
|
|
h.spans[p+s.npages-1] = s
|
2017-03-27 14:20:35 -04:00
|
|
|
if h.isLargeSpan(after.npages) {
|
|
|
|
|
h.freelarge.removeSpan(after)
|
|
|
|
|
} else {
|
|
|
|
|
h.freeList(after.npages).remove(after)
|
|
|
|
|
}
|
|
|
|
|
after.state = _MSpanDead
|
|
|
|
|
h.spanalloc.free(unsafe.Pointer(after))
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-27 14:20:35 -04:00
|
|
|
// Insert s into appropriate list or treap.
|
|
|
|
|
if h.isLargeSpan(s.npages) {
|
|
|
|
|
h.freelarge.insert(s)
|
|
|
|
|
} else {
|
|
|
|
|
h.freeList(s.npages).insert(s)
|
|
|
|
|
}
|
2015-10-15 15:59:49 -07:00
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) freeList(npages uintptr) *mSpanList {
|
2017-03-27 14:20:35 -04:00
|
|
|
return &h.free[npages]
|
2015-10-15 15:59:49 -07:00
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) busyList(npages uintptr) *mSpanList {
|
2017-02-05 19:34:16 -05:00
|
|
|
if npages < uintptr(len(h.busy)) {
|
2015-10-15 15:59:49 -07:00
|
|
|
return &h.busy[npages]
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
2015-10-15 15:59:49 -07:00
|
|
|
return &h.busylarge
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
2017-03-27 14:20:35 -04:00
|
|
|
func scavengeTreapNode(t *treapNode, now, limit uint64) uintptr {
|
|
|
|
|
s := t.spanKey
|
|
|
|
|
var sumreleased uintptr
|
|
|
|
|
if (now-uint64(s.unusedsince)) > limit && s.npreleased != s.npages {
|
|
|
|
|
start := s.base()
|
|
|
|
|
end := start + s.npages<<_PageShift
|
|
|
|
|
if physPageSize > _PageSize {
|
|
|
|
|
// We can only release pages in
|
|
|
|
|
// physPageSize blocks, so round start
|
|
|
|
|
// and end in. (Otherwise, madvise
|
|
|
|
|
// will round them *out* and release
|
|
|
|
|
// more memory than we want.)
|
|
|
|
|
start = (start + physPageSize - 1) &^ (physPageSize - 1)
|
|
|
|
|
end &^= physPageSize - 1
|
|
|
|
|
if end <= start {
|
|
|
|
|
// start and end don't span a
|
|
|
|
|
// whole physical page.
|
|
|
|
|
return sumreleased
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
len := end - start
|
|
|
|
|
released := len - (s.npreleased << _PageShift)
|
|
|
|
|
if physPageSize > _PageSize && released == 0 {
|
|
|
|
|
return sumreleased
|
|
|
|
|
}
|
|
|
|
|
memstats.heap_released += uint64(released)
|
|
|
|
|
sumreleased += released
|
|
|
|
|
s.npreleased = len >> _PageShift
|
|
|
|
|
sysUnused(unsafe.Pointer(start), len)
|
|
|
|
|
}
|
|
|
|
|
return sumreleased
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-15 15:59:49 -07:00
|
|
|
func scavengelist(list *mSpanList, now, limit uint64) uintptr {
|
2015-11-11 16:13:51 -08:00
|
|
|
if list.isEmpty() {
|
2014-11-11 17:05:02 -05:00
|
|
|
return 0
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var sumreleased uintptr
|
2015-10-15 15:59:49 -07:00
|
|
|
for s := list.first; s != nil; s = s.next {
|
2014-11-11 17:05:02 -05:00
|
|
|
if (now-uint64(s.unusedsince)) > limit && s.npreleased != s.npages {
|
2016-04-28 10:59:00 -04:00
|
|
|
start := s.base()
|
runtime: scavenge memory on physical page-aligned boundaries
Currently the scavenger marks memory unused in multiples of the
allocator page size (8K). This is safe as long as the true physical
page size is 4K (or 8K), as it is on many platforms. However, on
ARM64, PPC64x, and MIPS64, the physical page size is larger than 8K,
so if we attempt to mark memory unused, the kernel will round the
boundaries of the region *out* to all pages covered by the requested
region, and we'll release a larger region of memory than intended. As
a result, the scavenger is currently disabled on these platforms.
Fix this by first rounding the region to be marked unused *in* to
multiples of the physical page size, so that when we ask the kernel to
mark it unused, it releases exactly the requested region.
Fixes #9993.
Change-Id: I96d5fdc2f77f9d69abadcea29bcfe55e68288cb1
Reviewed-on: https://go-review.googlesource.com/22066
Reviewed-by: Rick Hudson <rlh@golang.org>
2016-04-14 13:41:36 -04:00
|
|
|
end := start + s.npages<<_PageShift
|
2016-07-18 12:24:02 -04:00
|
|
|
if physPageSize > _PageSize {
|
runtime: scavenge memory on physical page-aligned boundaries
Currently the scavenger marks memory unused in multiples of the
allocator page size (8K). This is safe as long as the true physical
page size is 4K (or 8K), as it is on many platforms. However, on
ARM64, PPC64x, and MIPS64, the physical page size is larger than 8K,
so if we attempt to mark memory unused, the kernel will round the
boundaries of the region *out* to all pages covered by the requested
region, and we'll release a larger region of memory than intended. As
a result, the scavenger is currently disabled on these platforms.
Fix this by first rounding the region to be marked unused *in* to
multiples of the physical page size, so that when we ask the kernel to
mark it unused, it releases exactly the requested region.
Fixes #9993.
Change-Id: I96d5fdc2f77f9d69abadcea29bcfe55e68288cb1
Reviewed-on: https://go-review.googlesource.com/22066
Reviewed-by: Rick Hudson <rlh@golang.org>
2016-04-14 13:41:36 -04:00
|
|
|
// We can only release pages in
|
2016-07-18 12:24:02 -04:00
|
|
|
// physPageSize blocks, so round start
|
runtime: scavenge memory on physical page-aligned boundaries
Currently the scavenger marks memory unused in multiples of the
allocator page size (8K). This is safe as long as the true physical
page size is 4K (or 8K), as it is on many platforms. However, on
ARM64, PPC64x, and MIPS64, the physical page size is larger than 8K,
so if we attempt to mark memory unused, the kernel will round the
boundaries of the region *out* to all pages covered by the requested
region, and we'll release a larger region of memory than intended. As
a result, the scavenger is currently disabled on these platforms.
Fix this by first rounding the region to be marked unused *in* to
multiples of the physical page size, so that when we ask the kernel to
mark it unused, it releases exactly the requested region.
Fixes #9993.
Change-Id: I96d5fdc2f77f9d69abadcea29bcfe55e68288cb1
Reviewed-on: https://go-review.googlesource.com/22066
Reviewed-by: Rick Hudson <rlh@golang.org>
2016-04-14 13:41:36 -04:00
|
|
|
// and end in. (Otherwise, madvise
|
|
|
|
|
// will round them *out* and release
|
|
|
|
|
// more memory than we want.)
|
2016-07-18 12:24:02 -04:00
|
|
|
start = (start + physPageSize - 1) &^ (physPageSize - 1)
|
|
|
|
|
end &^= physPageSize - 1
|
2016-08-19 16:03:14 -04:00
|
|
|
if end <= start {
|
|
|
|
|
// start and end don't span a
|
|
|
|
|
// whole physical page.
|
runtime: scavenge memory on physical page-aligned boundaries
Currently the scavenger marks memory unused in multiples of the
allocator page size (8K). This is safe as long as the true physical
page size is 4K (or 8K), as it is on many platforms. However, on
ARM64, PPC64x, and MIPS64, the physical page size is larger than 8K,
so if we attempt to mark memory unused, the kernel will round the
boundaries of the region *out* to all pages covered by the requested
region, and we'll release a larger region of memory than intended. As
a result, the scavenger is currently disabled on these platforms.
Fix this by first rounding the region to be marked unused *in* to
multiples of the physical page size, so that when we ask the kernel to
mark it unused, it releases exactly the requested region.
Fixes #9993.
Change-Id: I96d5fdc2f77f9d69abadcea29bcfe55e68288cb1
Reviewed-on: https://go-review.googlesource.com/22066
Reviewed-by: Rick Hudson <rlh@golang.org>
2016-04-14 13:41:36 -04:00
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
len := end - start
|
|
|
|
|
|
|
|
|
|
released := len - (s.npreleased << _PageShift)
|
2016-07-18 12:24:02 -04:00
|
|
|
if physPageSize > _PageSize && released == 0 {
|
runtime: scavenge memory on physical page-aligned boundaries
Currently the scavenger marks memory unused in multiples of the
allocator page size (8K). This is safe as long as the true physical
page size is 4K (or 8K), as it is on many platforms. However, on
ARM64, PPC64x, and MIPS64, the physical page size is larger than 8K,
so if we attempt to mark memory unused, the kernel will round the
boundaries of the region *out* to all pages covered by the requested
region, and we'll release a larger region of memory than intended. As
a result, the scavenger is currently disabled on these platforms.
Fix this by first rounding the region to be marked unused *in* to
multiples of the physical page size, so that when we ask the kernel to
mark it unused, it releases exactly the requested region.
Fixes #9993.
Change-Id: I96d5fdc2f77f9d69abadcea29bcfe55e68288cb1
Reviewed-on: https://go-review.googlesource.com/22066
Reviewed-by: Rick Hudson <rlh@golang.org>
2016-04-14 13:41:36 -04:00
|
|
|
continue
|
|
|
|
|
}
|
2014-11-11 17:05:02 -05:00
|
|
|
memstats.heap_released += uint64(released)
|
|
|
|
|
sumreleased += released
|
runtime: scavenge memory on physical page-aligned boundaries
Currently the scavenger marks memory unused in multiples of the
allocator page size (8K). This is safe as long as the true physical
page size is 4K (or 8K), as it is on many platforms. However, on
ARM64, PPC64x, and MIPS64, the physical page size is larger than 8K,
so if we attempt to mark memory unused, the kernel will round the
boundaries of the region *out* to all pages covered by the requested
region, and we'll release a larger region of memory than intended. As
a result, the scavenger is currently disabled on these platforms.
Fix this by first rounding the region to be marked unused *in* to
multiples of the physical page size, so that when we ask the kernel to
mark it unused, it releases exactly the requested region.
Fixes #9993.
Change-Id: I96d5fdc2f77f9d69abadcea29bcfe55e68288cb1
Reviewed-on: https://go-review.googlesource.com/22066
Reviewed-by: Rick Hudson <rlh@golang.org>
2016-04-14 13:41:36 -04:00
|
|
|
s.npreleased = len >> _PageShift
|
|
|
|
|
sysUnused(unsafe.Pointer(start), len)
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return sumreleased
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) scavenge(k int32, now, limit uint64) {
|
2017-03-16 17:02:24 -04:00
|
|
|
// Disallow malloc or panic while holding the heap lock. We do
|
|
|
|
|
// this here because this is an non-mallocgc entry-point to
|
|
|
|
|
// the mheap API.
|
|
|
|
|
gp := getg()
|
|
|
|
|
gp.m.mallocing++
|
2014-11-11 17:05:02 -05:00
|
|
|
lock(&h.lock)
|
|
|
|
|
var sumreleased uintptr
|
|
|
|
|
for i := 0; i < len(h.free); i++ {
|
|
|
|
|
sumreleased += scavengelist(&h.free[i], now, limit)
|
|
|
|
|
}
|
2017-03-27 14:20:35 -04:00
|
|
|
sumreleased += scavengetreap(h.freelarge.treap, now, limit)
|
2014-11-11 17:05:02 -05:00
|
|
|
unlock(&h.lock)
|
2017-03-16 17:02:24 -04:00
|
|
|
gp.m.mallocing--
|
2014-11-11 17:05:02 -05:00
|
|
|
|
|
|
|
|
if debug.gctrace > 0 {
|
|
|
|
|
if sumreleased > 0 {
|
|
|
|
|
print("scvg", k, ": ", sumreleased>>20, " MB released\n")
|
|
|
|
|
}
|
|
|
|
|
print("scvg", k, ": inuse: ", memstats.heap_inuse>>20, ", idle: ", memstats.heap_idle>>20, ", sys: ", memstats.heap_sys>>20, ", released: ", memstats.heap_released>>20, ", consumed: ", (memstats.heap_sys-memstats.heap_released)>>20, " (MB)\n")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-19 15:48:40 -05:00
|
|
|
//go:linkname runtime_debug_freeOSMemory runtime/debug.freeOSMemory
|
|
|
|
|
func runtime_debug_freeOSMemory() {
|
2017-02-23 21:55:37 -05:00
|
|
|
GC()
|
2015-11-11 16:13:51 -08:00
|
|
|
systemstack(func() { mheap_.scavenge(-1, ^uint64(0), 0) })
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Initialize a new span with the given start and npages.
|
2016-04-28 11:21:01 -04:00
|
|
|
func (span *mspan) init(base uintptr, npages uintptr) {
|
runtime: make fixalloc zero allocations on reuse
Currently fixalloc does not zero memory it reuses. This is dangerous
with the hybrid barrier if the type may contain heap pointers, since
it may cause us to observe a dead heap pointer on reuse. It's also
error-prone since it's the only allocator that doesn't zero on
allocation (mallocgc of course zeroes, but so do persistentalloc and
sysAlloc). It's also largely pointless: for mcache, the caller
immediately memclrs the allocation; and the two specials types are
tiny so there's no real cost to zeroing them.
Change fixalloc to zero allocations by default.
The only type we don't zero by default is mspan. This actually
requires that the spsn's sweepgen survive across freeing and
reallocating a span. If we were to zero it, the following race would
be possible:
1. The current sweepgen is 2. Span s is on the unswept list.
2. Direct sweeping sweeps span s, finds it's all free, and releases s
to the fixalloc.
3. Thread 1 allocates s from fixalloc. Suppose this zeros s, including
s.sweepgen.
4. Thread 1 calls s.init, which sets s.state to _MSpanDead.
5. On thread 2, background sweeping comes across span s in allspans
and cas's s.sweepgen from 0 (sg-2) to 1 (sg-1). Now it thinks it
owns it for sweeping. 6. Thread 1 continues initializing s.
Everything breaks.
I would like to fix this because it's obviously confusing, but it's a
subtle enough problem that I'm leaving it alone for now. The solution
may be to skip sweepgen 0, but then we have to think about wrap-around
much more carefully.
Updates #17503.
Change-Id: Ie08691feed3abbb06a31381b94beb0a2e36a0613
Reviewed-on: https://go-review.googlesource.com/31368
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Rick Hudson <rlh@golang.org>
2016-09-25 17:12:43 -04:00
|
|
|
// span is *not* zeroed.
|
2014-11-11 17:05:02 -05:00
|
|
|
span.next = nil
|
|
|
|
|
span.prev = nil
|
2015-10-15 15:59:49 -07:00
|
|
|
span.list = nil
|
2016-04-28 11:21:01 -04:00
|
|
|
span.startAddr = base
|
2014-11-11 17:05:02 -05:00
|
|
|
span.npages = npages
|
2016-02-16 17:16:43 -05:00
|
|
|
span.allocCount = 0
|
2014-11-11 17:05:02 -05:00
|
|
|
span.sizeclass = 0
|
|
|
|
|
span.incache = false
|
|
|
|
|
span.elemsize = 0
|
|
|
|
|
span.state = _MSpanDead
|
|
|
|
|
span.unusedsince = 0
|
|
|
|
|
span.npreleased = 0
|
|
|
|
|
span.speciallock.key = 0
|
|
|
|
|
span.specials = nil
|
|
|
|
|
span.needzero = 0
|
2016-02-11 13:57:58 -05:00
|
|
|
span.freeindex = 0
|
2016-03-14 12:17:48 -04:00
|
|
|
span.allocBits = nil
|
|
|
|
|
span.gcmarkBits = nil
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
func (span *mspan) inList() bool {
|
2016-10-11 11:47:14 -04:00
|
|
|
return span.list != nil
|
2015-10-15 15:59:49 -07:00
|
|
|
}
|
|
|
|
|
|
2014-11-11 17:05:02 -05:00
|
|
|
// Initialize an empty doubly-linked list.
|
2015-11-11 16:13:51 -08:00
|
|
|
func (list *mSpanList) init() {
|
2015-10-15 15:59:49 -07:00
|
|
|
list.first = nil
|
2016-10-11 11:47:14 -04:00
|
|
|
list.last = nil
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
func (list *mSpanList) remove(span *mspan) {
|
2016-10-11 11:47:14 -04:00
|
|
|
if span.list != list {
|
2017-03-27 14:20:35 -04:00
|
|
|
print("runtime: failed MSpanList_Remove span.npages=", span.npages,
|
|
|
|
|
" span=", span, " prev=", span.prev, " span.list=", span.list, " list=", list, "\n")
|
2015-10-15 15:59:49 -07:00
|
|
|
throw("MSpanList_Remove")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
2016-10-11 11:47:14 -04:00
|
|
|
if list.first == span {
|
|
|
|
|
list.first = span.next
|
2015-10-15 15:59:49 -07:00
|
|
|
} else {
|
2016-10-11 11:47:14 -04:00
|
|
|
span.prev.next = span.next
|
|
|
|
|
}
|
|
|
|
|
if list.last == span {
|
2015-10-15 15:59:49 -07:00
|
|
|
list.last = span.prev
|
2016-10-11 11:47:14 -04:00
|
|
|
} else {
|
|
|
|
|
span.next.prev = span.prev
|
2015-10-15 15:59:49 -07:00
|
|
|
}
|
2014-11-11 17:05:02 -05:00
|
|
|
span.next = nil
|
2015-10-15 15:59:49 -07:00
|
|
|
span.prev = nil
|
|
|
|
|
span.list = nil
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
func (list *mSpanList) isEmpty() bool {
|
2015-10-15 15:59:49 -07:00
|
|
|
return list.first == nil
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
func (list *mSpanList) insert(span *mspan) {
|
2015-10-15 15:59:49 -07:00
|
|
|
if span.next != nil || span.prev != nil || span.list != nil {
|
2016-02-24 14:36:30 -05:00
|
|
|
println("runtime: failed MSpanList_Insert", span, span.next, span.prev, span.list)
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("MSpanList_Insert")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
2015-10-15 15:59:49 -07:00
|
|
|
span.next = list.first
|
|
|
|
|
if list.first != nil {
|
2016-10-11 11:47:14 -04:00
|
|
|
// The list contains at least one span; link it in.
|
|
|
|
|
// The last span in the list doesn't change.
|
|
|
|
|
list.first.prev = span
|
2015-10-15 15:59:49 -07:00
|
|
|
} else {
|
2016-10-11 11:47:14 -04:00
|
|
|
// The list contains no spans, so this is also the last span.
|
|
|
|
|
list.last = span
|
2015-10-15 15:59:49 -07:00
|
|
|
}
|
|
|
|
|
list.first = span
|
|
|
|
|
span.list = list
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
func (list *mSpanList) insertBack(span *mspan) {
|
2015-10-15 15:59:49 -07:00
|
|
|
if span.next != nil || span.prev != nil || span.list != nil {
|
2017-03-27 14:20:35 -04:00
|
|
|
println("runtime: failed MSpanList_InsertBack", span, span.next, span.prev, span.list)
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("MSpanList_InsertBack")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
2015-10-15 15:59:49 -07:00
|
|
|
span.prev = list.last
|
2016-10-11 11:47:14 -04:00
|
|
|
if list.last != nil {
|
|
|
|
|
// The list contains at least one span.
|
|
|
|
|
list.last.next = span
|
|
|
|
|
} else {
|
|
|
|
|
// The list contains no spans, so this is also the first span.
|
|
|
|
|
list.first = span
|
|
|
|
|
}
|
|
|
|
|
list.last = span
|
2015-10-15 15:59:49 -07:00
|
|
|
span.list = list
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
2017-03-20 17:25:59 -04:00
|
|
|
// takeAll removes all spans from other and inserts them at the front
|
|
|
|
|
// of list.
|
|
|
|
|
func (list *mSpanList) takeAll(other *mSpanList) {
|
|
|
|
|
if other.isEmpty() {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Reparent everything in other to list.
|
|
|
|
|
for s := other.first; s != nil; s = s.next {
|
|
|
|
|
s.list = list
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Concatenate the lists.
|
|
|
|
|
if list.isEmpty() {
|
|
|
|
|
*list = *other
|
|
|
|
|
} else {
|
|
|
|
|
// Neither list is empty. Put other before list.
|
|
|
|
|
other.last.next = list.first
|
|
|
|
|
list.first.prev = other.last
|
|
|
|
|
list.first = other.first
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
other.first, other.last = nil, nil
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-19 13:38:46 -05:00
|
|
|
const (
|
|
|
|
|
_KindSpecialFinalizer = 1
|
|
|
|
|
_KindSpecialProfile = 2
|
|
|
|
|
// Note: The finalizer special must be first because if we're freeing
|
|
|
|
|
// an object, a finalizer special will cause the freeing operation
|
|
|
|
|
// to abort, and we want to keep the other special records around
|
|
|
|
|
// if that happens.
|
|
|
|
|
)
|
|
|
|
|
|
2016-10-11 22:58:21 -04:00
|
|
|
//go:notinheap
|
2015-02-19 13:38:46 -05:00
|
|
|
type special struct {
|
|
|
|
|
next *special // linked list in span
|
|
|
|
|
offset uint16 // span offset of object
|
|
|
|
|
kind byte // kind of special
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-11 17:05:02 -05:00
|
|
|
// Adds the special record s to the list of special records for
|
2016-03-01 23:21:55 +00:00
|
|
|
// the object p. All fields of s should be filled in except for
|
2014-11-11 17:05:02 -05:00
|
|
|
// offset & next, which this routine will fill in.
|
|
|
|
|
// Returns true if the special was successfully added, false otherwise.
|
|
|
|
|
// (The add will fail only if a record with the same p and s->kind
|
|
|
|
|
// already exists.)
|
|
|
|
|
func addspecial(p unsafe.Pointer, s *special) bool {
|
2015-11-11 16:13:51 -08:00
|
|
|
span := mheap_.lookupMaybe(p)
|
2014-11-11 17:05:02 -05:00
|
|
|
if span == nil {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("addspecial on invalid pointer")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Ensure that the span is swept.
|
runtime: scan objects with finalizers concurrently
This reduces pause time by ~25% relative to tip and by ~50% relative
to Go 1.5.1.
Currently one of the steps of STW mark termination is to loop (in
parallel) over all spans to find objects with finalizers in order to
mark all objects reachable from these objects and to treat the
finalizer special as a root. Unfortunately, even if there are no
finalizers at all, this loop takes roughly 1 ms/heap GB/core, so
multi-gigabyte heaps can quickly push our STW time past 10ms.
Fix this by moving this scan from mark termination to concurrent scan,
where it can run in parallel with mutators. The loop itself could also
be optimized, but this cost is small compared to concurrent marking.
Making this scan concurrent introduces two complications:
1) The scan currently walks the specials list of each span without
locking it, which is safe only with the world stopped. We fix this by
speculatively checking if a span has any specials (the vast majority
won't) and then locking the specials list only if there are specials
to check.
2) An object can have a finalizer set after concurrent scan, in which
case it won't have been marked appropriately by concurrent scan. If
the finalizer is a closure and is only reachable from the special, it
could be swept before it is run. Likewise, if the object is not marked
yet when the finalizer is set and then becomes unreachable before it
is marked, other objects reachable only from it may be swept before
the finalizer function is run. We fix this issue by making
addfinalizer ensure the same marking invariants as markroot does.
For multi-gigabyte heaps, this reduces max pause time by 20%–30%
relative to tip (depending on GOMAXPROCS) and by ~50% relative to Go
1.5.1 (where this loop was neither concurrent nor parallel). Here are
the results for the garbage benchmark:
---------------- max pause ----------------
Heap Procs Concurrent scan STW parallel scan 1.5.1
24GB 12 18ms 23ms 37ms
24GB 4 18ms 25ms 37ms
4GB 4 3.8ms 4.9ms 6.9ms
In all cases, 95%ile pause time is similar to the max pause time. This
also improves mean STW time by 10%–30%.
Fixes #11485.
Change-Id: I9359d8c3d120a51d23d924b52bf853a1299b1dfd
Reviewed-on: https://go-review.googlesource.com/14982
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-09-24 14:39:27 -04:00
|
|
|
// Sweeping accesses the specials list w/o locks, so we have
|
|
|
|
|
// to synchronize with it. And it's just much safer.
|
2014-11-11 17:05:02 -05:00
|
|
|
mp := acquirem()
|
2015-11-11 16:13:51 -08:00
|
|
|
span.ensureSwept()
|
2014-11-11 17:05:02 -05:00
|
|
|
|
2016-04-28 10:59:00 -04:00
|
|
|
offset := uintptr(p) - span.base()
|
2014-11-11 17:05:02 -05:00
|
|
|
kind := s.kind
|
|
|
|
|
|
|
|
|
|
lock(&span.speciallock)
|
|
|
|
|
|
|
|
|
|
// Find splice point, check for existing record.
|
|
|
|
|
t := &span.specials
|
|
|
|
|
for {
|
|
|
|
|
x := *t
|
|
|
|
|
if x == nil {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if offset == uintptr(x.offset) && kind == x.kind {
|
|
|
|
|
unlock(&span.speciallock)
|
|
|
|
|
releasem(mp)
|
|
|
|
|
return false // already exists
|
|
|
|
|
}
|
|
|
|
|
if offset < uintptr(x.offset) || (offset == uintptr(x.offset) && kind < x.kind) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
t = &x.next
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Splice in record, fill in offset.
|
|
|
|
|
s.offset = uint16(offset)
|
|
|
|
|
s.next = *t
|
|
|
|
|
*t = s
|
|
|
|
|
unlock(&span.speciallock)
|
|
|
|
|
releasem(mp)
|
|
|
|
|
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Removes the Special record of the given kind for the object p.
|
|
|
|
|
// Returns the record if the record existed, nil otherwise.
|
|
|
|
|
// The caller must FixAlloc_Free the result.
|
|
|
|
|
func removespecial(p unsafe.Pointer, kind uint8) *special {
|
2015-11-11 16:13:51 -08:00
|
|
|
span := mheap_.lookupMaybe(p)
|
2014-11-11 17:05:02 -05:00
|
|
|
if span == nil {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("removespecial on invalid pointer")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Ensure that the span is swept.
|
runtime: scan objects with finalizers concurrently
This reduces pause time by ~25% relative to tip and by ~50% relative
to Go 1.5.1.
Currently one of the steps of STW mark termination is to loop (in
parallel) over all spans to find objects with finalizers in order to
mark all objects reachable from these objects and to treat the
finalizer special as a root. Unfortunately, even if there are no
finalizers at all, this loop takes roughly 1 ms/heap GB/core, so
multi-gigabyte heaps can quickly push our STW time past 10ms.
Fix this by moving this scan from mark termination to concurrent scan,
where it can run in parallel with mutators. The loop itself could also
be optimized, but this cost is small compared to concurrent marking.
Making this scan concurrent introduces two complications:
1) The scan currently walks the specials list of each span without
locking it, which is safe only with the world stopped. We fix this by
speculatively checking if a span has any specials (the vast majority
won't) and then locking the specials list only if there are specials
to check.
2) An object can have a finalizer set after concurrent scan, in which
case it won't have been marked appropriately by concurrent scan. If
the finalizer is a closure and is only reachable from the special, it
could be swept before it is run. Likewise, if the object is not marked
yet when the finalizer is set and then becomes unreachable before it
is marked, other objects reachable only from it may be swept before
the finalizer function is run. We fix this issue by making
addfinalizer ensure the same marking invariants as markroot does.
For multi-gigabyte heaps, this reduces max pause time by 20%–30%
relative to tip (depending on GOMAXPROCS) and by ~50% relative to Go
1.5.1 (where this loop was neither concurrent nor parallel). Here are
the results for the garbage benchmark:
---------------- max pause ----------------
Heap Procs Concurrent scan STW parallel scan 1.5.1
24GB 12 18ms 23ms 37ms
24GB 4 18ms 25ms 37ms
4GB 4 3.8ms 4.9ms 6.9ms
In all cases, 95%ile pause time is similar to the max pause time. This
also improves mean STW time by 10%–30%.
Fixes #11485.
Change-Id: I9359d8c3d120a51d23d924b52bf853a1299b1dfd
Reviewed-on: https://go-review.googlesource.com/14982
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-09-24 14:39:27 -04:00
|
|
|
// Sweeping accesses the specials list w/o locks, so we have
|
|
|
|
|
// to synchronize with it. And it's just much safer.
|
2014-11-11 17:05:02 -05:00
|
|
|
mp := acquirem()
|
2015-11-11 16:13:51 -08:00
|
|
|
span.ensureSwept()
|
2014-11-11 17:05:02 -05:00
|
|
|
|
2016-04-28 10:59:00 -04:00
|
|
|
offset := uintptr(p) - span.base()
|
2014-11-11 17:05:02 -05:00
|
|
|
|
|
|
|
|
lock(&span.speciallock)
|
|
|
|
|
t := &span.specials
|
|
|
|
|
for {
|
|
|
|
|
s := *t
|
|
|
|
|
if s == nil {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
// This function is used for finalizers only, so we don't check for
|
|
|
|
|
// "interior" specials (p must be exactly equal to s->offset).
|
|
|
|
|
if offset == uintptr(s.offset) && kind == s.kind {
|
|
|
|
|
*t = s.next
|
|
|
|
|
unlock(&span.speciallock)
|
|
|
|
|
releasem(mp)
|
|
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
t = &s.next
|
|
|
|
|
}
|
|
|
|
|
unlock(&span.speciallock)
|
|
|
|
|
releasem(mp)
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-19 13:38:46 -05:00
|
|
|
// The described object has a finalizer set for it.
|
2016-10-11 22:58:21 -04:00
|
|
|
//
|
|
|
|
|
// specialfinalizer is allocated from non-GC'd memory, so any heap
|
|
|
|
|
// pointers must be specially handled.
|
|
|
|
|
//
|
|
|
|
|
//go:notinheap
|
2015-02-19 13:38:46 -05:00
|
|
|
type specialfinalizer struct {
|
|
|
|
|
special special
|
2016-10-11 22:58:21 -04:00
|
|
|
fn *funcval // May be a heap pointer.
|
2015-02-19 13:38:46 -05:00
|
|
|
nret uintptr
|
2016-10-11 22:58:21 -04:00
|
|
|
fint *_type // May be a heap pointer, but always live.
|
|
|
|
|
ot *ptrtype // May be a heap pointer, but always live.
|
2015-02-19 13:38:46 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-01 23:21:55 +00:00
|
|
|
// Adds a finalizer to the object p. Returns true if it succeeded.
|
2014-11-11 17:05:02 -05:00
|
|
|
func addfinalizer(p unsafe.Pointer, f *funcval, nret uintptr, fint *_type, ot *ptrtype) bool {
|
|
|
|
|
lock(&mheap_.speciallock)
|
2015-11-11 16:13:51 -08:00
|
|
|
s := (*specialfinalizer)(mheap_.specialfinalizeralloc.alloc())
|
2014-11-11 17:05:02 -05:00
|
|
|
unlock(&mheap_.speciallock)
|
|
|
|
|
s.special.kind = _KindSpecialFinalizer
|
|
|
|
|
s.fn = f
|
|
|
|
|
s.nret = nret
|
|
|
|
|
s.fint = fint
|
|
|
|
|
s.ot = ot
|
|
|
|
|
if addspecial(p, &s.special) {
|
runtime: scan objects with finalizers concurrently
This reduces pause time by ~25% relative to tip and by ~50% relative
to Go 1.5.1.
Currently one of the steps of STW mark termination is to loop (in
parallel) over all spans to find objects with finalizers in order to
mark all objects reachable from these objects and to treat the
finalizer special as a root. Unfortunately, even if there are no
finalizers at all, this loop takes roughly 1 ms/heap GB/core, so
multi-gigabyte heaps can quickly push our STW time past 10ms.
Fix this by moving this scan from mark termination to concurrent scan,
where it can run in parallel with mutators. The loop itself could also
be optimized, but this cost is small compared to concurrent marking.
Making this scan concurrent introduces two complications:
1) The scan currently walks the specials list of each span without
locking it, which is safe only with the world stopped. We fix this by
speculatively checking if a span has any specials (the vast majority
won't) and then locking the specials list only if there are specials
to check.
2) An object can have a finalizer set after concurrent scan, in which
case it won't have been marked appropriately by concurrent scan. If
the finalizer is a closure and is only reachable from the special, it
could be swept before it is run. Likewise, if the object is not marked
yet when the finalizer is set and then becomes unreachable before it
is marked, other objects reachable only from it may be swept before
the finalizer function is run. We fix this issue by making
addfinalizer ensure the same marking invariants as markroot does.
For multi-gigabyte heaps, this reduces max pause time by 20%–30%
relative to tip (depending on GOMAXPROCS) and by ~50% relative to Go
1.5.1 (where this loop was neither concurrent nor parallel). Here are
the results for the garbage benchmark:
---------------- max pause ----------------
Heap Procs Concurrent scan STW parallel scan 1.5.1
24GB 12 18ms 23ms 37ms
24GB 4 18ms 25ms 37ms
4GB 4 3.8ms 4.9ms 6.9ms
In all cases, 95%ile pause time is similar to the max pause time. This
also improves mean STW time by 10%–30%.
Fixes #11485.
Change-Id: I9359d8c3d120a51d23d924b52bf853a1299b1dfd
Reviewed-on: https://go-review.googlesource.com/14982
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-09-24 14:39:27 -04:00
|
|
|
// This is responsible for maintaining the same
|
|
|
|
|
// GC-related invariants as markrootSpans in any
|
|
|
|
|
// situation where it's possible that markrootSpans
|
|
|
|
|
// has already run but mark termination hasn't yet.
|
|
|
|
|
if gcphase != _GCoff {
|
|
|
|
|
_, base, _ := findObject(p)
|
|
|
|
|
mp := acquirem()
|
|
|
|
|
gcw := &mp.p.ptr().gcw
|
|
|
|
|
// Mark everything reachable from the object
|
|
|
|
|
// so it's retained for the finalizer.
|
|
|
|
|
scanobject(uintptr(base), gcw)
|
|
|
|
|
// Mark the finalizer itself, since the
|
|
|
|
|
// special isn't part of the GC'd heap.
|
2015-11-11 12:39:30 -05:00
|
|
|
scanblock(uintptr(unsafe.Pointer(&s.fn)), sys.PtrSize, &oneptrmask[0], gcw)
|
runtime: scan objects with finalizers concurrently
This reduces pause time by ~25% relative to tip and by ~50% relative
to Go 1.5.1.
Currently one of the steps of STW mark termination is to loop (in
parallel) over all spans to find objects with finalizers in order to
mark all objects reachable from these objects and to treat the
finalizer special as a root. Unfortunately, even if there are no
finalizers at all, this loop takes roughly 1 ms/heap GB/core, so
multi-gigabyte heaps can quickly push our STW time past 10ms.
Fix this by moving this scan from mark termination to concurrent scan,
where it can run in parallel with mutators. The loop itself could also
be optimized, but this cost is small compared to concurrent marking.
Making this scan concurrent introduces two complications:
1) The scan currently walks the specials list of each span without
locking it, which is safe only with the world stopped. We fix this by
speculatively checking if a span has any specials (the vast majority
won't) and then locking the specials list only if there are specials
to check.
2) An object can have a finalizer set after concurrent scan, in which
case it won't have been marked appropriately by concurrent scan. If
the finalizer is a closure and is only reachable from the special, it
could be swept before it is run. Likewise, if the object is not marked
yet when the finalizer is set and then becomes unreachable before it
is marked, other objects reachable only from it may be swept before
the finalizer function is run. We fix this issue by making
addfinalizer ensure the same marking invariants as markroot does.
For multi-gigabyte heaps, this reduces max pause time by 20%–30%
relative to tip (depending on GOMAXPROCS) and by ~50% relative to Go
1.5.1 (where this loop was neither concurrent nor parallel). Here are
the results for the garbage benchmark:
---------------- max pause ----------------
Heap Procs Concurrent scan STW parallel scan 1.5.1
24GB 12 18ms 23ms 37ms
24GB 4 18ms 25ms 37ms
4GB 4 3.8ms 4.9ms 6.9ms
In all cases, 95%ile pause time is similar to the max pause time. This
also improves mean STW time by 10%–30%.
Fixes #11485.
Change-Id: I9359d8c3d120a51d23d924b52bf853a1299b1dfd
Reviewed-on: https://go-review.googlesource.com/14982
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-09-24 14:39:27 -04:00
|
|
|
if gcBlackenPromptly {
|
|
|
|
|
gcw.dispose()
|
|
|
|
|
}
|
|
|
|
|
releasem(mp)
|
|
|
|
|
}
|
2014-11-11 17:05:02 -05:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// There was an old finalizer
|
|
|
|
|
lock(&mheap_.speciallock)
|
2015-11-11 16:13:51 -08:00
|
|
|
mheap_.specialfinalizeralloc.free(unsafe.Pointer(s))
|
2014-11-11 17:05:02 -05:00
|
|
|
unlock(&mheap_.speciallock)
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Removes the finalizer (if any) from the object p.
|
|
|
|
|
func removefinalizer(p unsafe.Pointer) {
|
|
|
|
|
s := (*specialfinalizer)(unsafe.Pointer(removespecial(p, _KindSpecialFinalizer)))
|
|
|
|
|
if s == nil {
|
|
|
|
|
return // there wasn't a finalizer to remove
|
|
|
|
|
}
|
|
|
|
|
lock(&mheap_.speciallock)
|
2015-11-11 16:13:51 -08:00
|
|
|
mheap_.specialfinalizeralloc.free(unsafe.Pointer(s))
|
2014-11-11 17:05:02 -05:00
|
|
|
unlock(&mheap_.speciallock)
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-19 13:38:46 -05:00
|
|
|
// The described object is being heap profiled.
|
2016-10-11 22:58:21 -04:00
|
|
|
//
|
|
|
|
|
//go:notinheap
|
2015-02-19 13:38:46 -05:00
|
|
|
type specialprofile struct {
|
|
|
|
|
special special
|
|
|
|
|
b *bucket
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-11 17:05:02 -05:00
|
|
|
// Set the heap profile bucket associated with addr to b.
|
|
|
|
|
func setprofilebucket(p unsafe.Pointer, b *bucket) {
|
|
|
|
|
lock(&mheap_.speciallock)
|
2015-11-11 16:13:51 -08:00
|
|
|
s := (*specialprofile)(mheap_.specialprofilealloc.alloc())
|
2014-11-11 17:05:02 -05:00
|
|
|
unlock(&mheap_.speciallock)
|
|
|
|
|
s.special.kind = _KindSpecialProfile
|
|
|
|
|
s.b = b
|
|
|
|
|
if !addspecial(p, &s.special) {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("setprofilebucket: profile already set")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-01 23:21:55 +00:00
|
|
|
// Do whatever cleanup needs to be done to deallocate s. It has
|
2014-11-11 17:05:02 -05:00
|
|
|
// already been unlinked from the MSpan specials list.
|
2015-11-03 20:00:21 +01:00
|
|
|
func freespecial(s *special, p unsafe.Pointer, size uintptr) {
|
2014-11-11 17:05:02 -05:00
|
|
|
switch s.kind {
|
|
|
|
|
case _KindSpecialFinalizer:
|
|
|
|
|
sf := (*specialfinalizer)(unsafe.Pointer(s))
|
|
|
|
|
queuefinalizer(p, sf.fn, sf.nret, sf.fint, sf.ot)
|
|
|
|
|
lock(&mheap_.speciallock)
|
2015-11-11 16:13:51 -08:00
|
|
|
mheap_.specialfinalizeralloc.free(unsafe.Pointer(sf))
|
2014-11-11 17:05:02 -05:00
|
|
|
unlock(&mheap_.speciallock)
|
|
|
|
|
case _KindSpecialProfile:
|
|
|
|
|
sp := (*specialprofile)(unsafe.Pointer(s))
|
2015-11-03 20:00:21 +01:00
|
|
|
mProf_Free(sp.b, size)
|
2014-11-11 17:05:02 -05:00
|
|
|
lock(&mheap_.speciallock)
|
2015-11-11 16:13:51 -08:00
|
|
|
mheap_.specialprofilealloc.free(unsafe.Pointer(sp))
|
2014-11-11 17:05:02 -05:00
|
|
|
unlock(&mheap_.speciallock)
|
|
|
|
|
default:
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("bad special kind")
|
2014-11-11 17:05:02 -05:00
|
|
|
panic("not reached")
|
|
|
|
|
}
|
|
|
|
|
}
|
2016-03-14 12:17:48 -04:00
|
|
|
|
2017-03-24 12:02:12 -04:00
|
|
|
// gcBits is an alloc/mark bitmap. This is always used as *gcBits.
|
|
|
|
|
//
|
|
|
|
|
//go:notinheap
|
|
|
|
|
type gcBits uint8
|
|
|
|
|
|
|
|
|
|
// bytep returns a pointer to the n'th byte of b.
|
|
|
|
|
func (b *gcBits) bytep(n uintptr) *uint8 {
|
|
|
|
|
return addb((*uint8)(b), n)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// bitp returns a pointer to the byte containing bit n and a mask for
|
|
|
|
|
// selecting that bit from *bytep.
|
|
|
|
|
func (b *gcBits) bitp(n uintptr) (bytep *uint8, mask uint8) {
|
|
|
|
|
return b.bytep(n / 8), 1 << (n % 8)
|
|
|
|
|
}
|
|
|
|
|
|
2016-05-09 11:29:34 -04:00
|
|
|
const gcBitsChunkBytes = uintptr(64 << 10)
|
2016-03-14 12:17:48 -04:00
|
|
|
const gcBitsHeaderBytes = unsafe.Sizeof(gcBitsHeader{})
|
|
|
|
|
|
|
|
|
|
type gcBitsHeader struct {
|
|
|
|
|
free uintptr // free is the index into bits of the next free byte.
|
|
|
|
|
next uintptr // *gcBits triggers recursive type bug. (issue 14620)
|
|
|
|
|
}
|
|
|
|
|
|
2016-10-11 22:58:21 -04:00
|
|
|
//go:notinheap
|
2017-03-24 11:36:40 -04:00
|
|
|
type gcBitsArena struct {
|
2016-03-14 12:17:48 -04:00
|
|
|
// gcBitsHeader // side step recursive type bug (issue 14620) by including fields by hand.
|
2016-12-17 22:07:27 -05:00
|
|
|
free uintptr // free is the index into bits of the next free byte; read/write atomically
|
2017-03-24 11:36:40 -04:00
|
|
|
next *gcBitsArena
|
2017-03-24 12:02:12 -04:00
|
|
|
bits [gcBitsChunkBytes - gcBitsHeaderBytes]gcBits
|
2016-03-14 12:17:48 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var gcBitsArenas struct {
|
|
|
|
|
lock mutex
|
2017-03-24 11:36:40 -04:00
|
|
|
free *gcBitsArena
|
|
|
|
|
next *gcBitsArena // Read atomically. Write atomically under lock.
|
|
|
|
|
current *gcBitsArena
|
|
|
|
|
previous *gcBitsArena
|
2016-03-14 12:17:48 -04:00
|
|
|
}
|
|
|
|
|
|
2016-12-16 15:56:13 -05:00
|
|
|
// tryAlloc allocates from b or returns nil if b does not have enough room.
|
2016-12-17 22:07:27 -05:00
|
|
|
// This is safe to call concurrently.
|
2017-03-24 12:02:12 -04:00
|
|
|
func (b *gcBitsArena) tryAlloc(bytes uintptr) *gcBits {
|
2016-12-17 22:07:27 -05:00
|
|
|
if b == nil || atomic.Loaduintptr(&b.free)+bytes > uintptr(len(b.bits)) {
|
2016-12-16 15:56:13 -05:00
|
|
|
return nil
|
|
|
|
|
}
|
2016-12-17 22:07:27 -05:00
|
|
|
// Try to allocate from this block.
|
|
|
|
|
end := atomic.Xadduintptr(&b.free, bytes)
|
|
|
|
|
if end > uintptr(len(b.bits)) {
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
// There was enough room.
|
|
|
|
|
start := end - bytes
|
|
|
|
|
return &b.bits[start]
|
2016-12-16 15:56:13 -05:00
|
|
|
}
|
|
|
|
|
|
2016-03-14 12:17:48 -04:00
|
|
|
// newMarkBits returns a pointer to 8 byte aligned bytes
|
|
|
|
|
// to be used for a span's mark bits.
|
2017-03-24 12:02:12 -04:00
|
|
|
func newMarkBits(nelems uintptr) *gcBits {
|
2016-03-14 12:17:48 -04:00
|
|
|
blocksNeeded := uintptr((nelems + 63) / 64)
|
|
|
|
|
bytesNeeded := blocksNeeded * 8
|
2016-12-17 22:07:27 -05:00
|
|
|
|
|
|
|
|
// Try directly allocating from the current head arena.
|
2017-03-24 11:36:40 -04:00
|
|
|
head := (*gcBitsArena)(atomic.Loadp(unsafe.Pointer(&gcBitsArenas.next)))
|
2016-12-17 22:07:27 -05:00
|
|
|
if p := head.tryAlloc(bytesNeeded); p != nil {
|
|
|
|
|
return p
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// There's not enough room in the head arena. We may need to
|
|
|
|
|
// allocate a new arena.
|
|
|
|
|
lock(&gcBitsArenas.lock)
|
|
|
|
|
// Try the head arena again, since it may have changed. Now
|
|
|
|
|
// that we hold the lock, the list head can't change, but its
|
|
|
|
|
// free position still can.
|
2016-12-16 15:56:13 -05:00
|
|
|
if p := gcBitsArenas.next.tryAlloc(bytesNeeded); p != nil {
|
|
|
|
|
unlock(&gcBitsArenas.lock)
|
|
|
|
|
return p
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Allocate a new arena. This may temporarily drop the lock.
|
|
|
|
|
fresh := newArenaMayUnlock()
|
|
|
|
|
// If newArenaMayUnlock dropped the lock, another thread may
|
|
|
|
|
// have put a fresh arena on the "next" list. Try allocating
|
|
|
|
|
// from next again.
|
|
|
|
|
if p := gcBitsArenas.next.tryAlloc(bytesNeeded); p != nil {
|
|
|
|
|
// Put fresh back on the free list.
|
|
|
|
|
// TODO: Mark it "already zeroed"
|
|
|
|
|
fresh.next = gcBitsArenas.free
|
|
|
|
|
gcBitsArenas.free = fresh
|
|
|
|
|
unlock(&gcBitsArenas.lock)
|
|
|
|
|
return p
|
|
|
|
|
}
|
|
|
|
|
|
2016-12-17 22:07:27 -05:00
|
|
|
// Allocate from the fresh arena. We haven't linked it in yet, so
|
|
|
|
|
// this cannot race and is guaranteed to succeed.
|
2016-12-16 15:56:13 -05:00
|
|
|
p := fresh.tryAlloc(bytesNeeded)
|
|
|
|
|
if p == nil {
|
2016-03-14 12:17:48 -04:00
|
|
|
throw("markBits overflow")
|
|
|
|
|
}
|
2016-12-16 15:56:13 -05:00
|
|
|
|
|
|
|
|
// Add the fresh arena to the "next" list.
|
|
|
|
|
fresh.next = gcBitsArenas.next
|
2016-12-17 22:07:27 -05:00
|
|
|
atomic.StorepNoWB(unsafe.Pointer(&gcBitsArenas.next), unsafe.Pointer(fresh))
|
2016-12-16 15:56:13 -05:00
|
|
|
|
2016-03-14 12:17:48 -04:00
|
|
|
unlock(&gcBitsArenas.lock)
|
2016-12-16 15:56:13 -05:00
|
|
|
return p
|
2016-03-14 12:17:48 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// newAllocBits returns a pointer to 8 byte aligned bytes
|
|
|
|
|
// to be used for this span's alloc bits.
|
|
|
|
|
// newAllocBits is used to provide newly initialized spans
|
|
|
|
|
// allocation bits. For spans not being initialized the
|
|
|
|
|
// the mark bits are repurposed as allocation bits when
|
|
|
|
|
// the span is swept.
|
2017-03-24 12:02:12 -04:00
|
|
|
func newAllocBits(nelems uintptr) *gcBits {
|
2016-03-14 12:17:48 -04:00
|
|
|
return newMarkBits(nelems)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// nextMarkBitArenaEpoch establishes a new epoch for the arenas
|
|
|
|
|
// holding the mark bits. The arenas are named relative to the
|
|
|
|
|
// current GC cycle which is demarcated by the call to finishweep_m.
|
|
|
|
|
//
|
|
|
|
|
// All current spans have been swept.
|
|
|
|
|
// During that sweep each span allocated room for its gcmarkBits in
|
|
|
|
|
// gcBitsArenas.next block. gcBitsArenas.next becomes the gcBitsArenas.current
|
|
|
|
|
// where the GC will mark objects and after each span is swept these bits
|
|
|
|
|
// will be used to allocate objects.
|
|
|
|
|
// gcBitsArenas.current becomes gcBitsArenas.previous where the span's
|
|
|
|
|
// gcAllocBits live until all the spans have been swept during this GC cycle.
|
|
|
|
|
// The span's sweep extinguishes all the references to gcBitsArenas.previous
|
|
|
|
|
// by pointing gcAllocBits into the gcBitsArenas.current.
|
|
|
|
|
// The gcBitsArenas.previous is released to the gcBitsArenas.free list.
|
|
|
|
|
func nextMarkBitArenaEpoch() {
|
|
|
|
|
lock(&gcBitsArenas.lock)
|
|
|
|
|
if gcBitsArenas.previous != nil {
|
|
|
|
|
if gcBitsArenas.free == nil {
|
|
|
|
|
gcBitsArenas.free = gcBitsArenas.previous
|
|
|
|
|
} else {
|
|
|
|
|
// Find end of previous arenas.
|
|
|
|
|
last := gcBitsArenas.previous
|
|
|
|
|
for last = gcBitsArenas.previous; last.next != nil; last = last.next {
|
|
|
|
|
}
|
|
|
|
|
last.next = gcBitsArenas.free
|
|
|
|
|
gcBitsArenas.free = gcBitsArenas.previous
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
gcBitsArenas.previous = gcBitsArenas.current
|
|
|
|
|
gcBitsArenas.current = gcBitsArenas.next
|
2016-12-17 22:07:27 -05:00
|
|
|
atomic.StorepNoWB(unsafe.Pointer(&gcBitsArenas.next), nil) // newMarkBits calls newArena when needed
|
2016-03-14 12:17:48 -04:00
|
|
|
unlock(&gcBitsArenas.lock)
|
|
|
|
|
}
|
|
|
|
|
|
2016-12-16 15:56:13 -05:00
|
|
|
// newArenaMayUnlock allocates and zeroes a gcBits arena.
|
|
|
|
|
// The caller must hold gcBitsArena.lock. This may temporarily release it.
|
2017-03-24 11:36:40 -04:00
|
|
|
func newArenaMayUnlock() *gcBitsArena {
|
|
|
|
|
var result *gcBitsArena
|
2016-03-14 12:17:48 -04:00
|
|
|
if gcBitsArenas.free == nil {
|
2016-12-16 15:56:13 -05:00
|
|
|
unlock(&gcBitsArenas.lock)
|
2017-03-24 11:36:40 -04:00
|
|
|
result = (*gcBitsArena)(sysAlloc(gcBitsChunkBytes, &memstats.gc_sys))
|
2016-03-14 12:17:48 -04:00
|
|
|
if result == nil {
|
|
|
|
|
throw("runtime: cannot allocate memory")
|
|
|
|
|
}
|
2016-12-16 15:56:13 -05:00
|
|
|
lock(&gcBitsArenas.lock)
|
2016-03-14 12:17:48 -04:00
|
|
|
} else {
|
|
|
|
|
result = gcBitsArenas.free
|
|
|
|
|
gcBitsArenas.free = gcBitsArenas.free.next
|
2016-10-17 18:41:56 -04:00
|
|
|
memclrNoHeapPointers(unsafe.Pointer(result), gcBitsChunkBytes)
|
2016-03-14 12:17:48 -04:00
|
|
|
}
|
|
|
|
|
result.next = nil
|
|
|
|
|
// If result.bits is not 8 byte aligned adjust index so
|
|
|
|
|
// that &result.bits[result.free] is 8 byte aligned.
|
2017-03-24 11:36:40 -04:00
|
|
|
if uintptr(unsafe.Offsetof(gcBitsArena{}.bits))&7 == 0 {
|
2016-03-14 12:17:48 -04:00
|
|
|
result.free = 0
|
|
|
|
|
} else {
|
|
|
|
|
result.free = 8 - (uintptr(unsafe.Pointer(&result.bits[0])) & 7)
|
|
|
|
|
}
|
|
|
|
|
return result
|
|
|
|
|
}
|