mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
runtime: use sparse mappings for the heap
This replaces the contiguous heap arena mapping with a potentially sparse mapping that can support heap mappings anywhere in the address space. This has several advantages over the current approach: * There is no longer any limit on the size of the Go heap. (Currently it's limited to 512GB.) Hence, this fixes #10460. * It eliminates many failures modes of heap initialization and growing. In particular it eliminates any possibility of panicking with an address space conflict. This can happen for many reasons and even causes a low but steady rate of TSAN test failures because of conflicts with the TSAN runtime. See #16936 and #11993. * It eliminates the notion of "non-reserved" heap, which was added because creating huge address space reservations (particularly on 64-bit) led to huge process VSIZE. This was at best confusing and at worst conflicted badly with ulimit -v. However, the non-reserved heap logic is complicated, can race with other mappings in non-pure Go binaries (e.g., #18976), and requires that the entire heap be either reserved or non-reserved. We currently maintain the latter property, but it's quite difficult to convince yourself of that, and hence difficult to keep correct. This logic is still present, but will be removed in the next CL. * It fixes problems on 32-bit where skipping over parts of the address space leads to mapping huge (and never-to-be-used) metadata structures. See #19831. This also completely rewrites and significantly simplifies mheap.sysAlloc, which has been a source of many bugs. E.g., #21044, #20259, #18651, and #13143 (and maybe #23222). This change also makes it possible to allocate individual objects larger than 512GB. As a result, a few tests that expected huge allocations to fail needed to be changed to make even larger allocations. However, at the moment attempting to allocate a humongous object may cause the program to freeze for several minutes on Linux as we fall back to probing every page with addrspace_free. That logic (and this failure mode) will be removed in the next CL. Fixes #10460. Fixes #22204 (since it rewrites the code involved). This slightly slows down compilebench and the x/benchmarks garbage benchmark. name old time/op new time/op delta Template 184ms ± 1% 185ms ± 1% ~ (p=0.065 n=10+9) Unicode 86.9ms ± 3% 86.3ms ± 1% ~ (p=0.631 n=10+10) GoTypes 599ms ± 0% 602ms ± 0% +0.56% (p=0.000 n=10+9) Compiler 2.87s ± 1% 2.89s ± 1% +0.51% (p=0.002 n=9+10) SSA 7.29s ± 1% 7.25s ± 1% ~ (p=0.182 n=10+9) Flate 118ms ± 2% 118ms ± 1% ~ (p=0.113 n=9+9) GoParser 147ms ± 1% 148ms ± 1% +1.07% (p=0.003 n=9+10) Reflect 401ms ± 1% 404ms ± 1% +0.71% (p=0.003 n=10+9) Tar 175ms ± 1% 175ms ± 1% ~ (p=0.604 n=9+10) XML 209ms ± 1% 210ms ± 1% ~ (p=0.052 n=10+10) (https://perf.golang.org/search?q=upload:20171231.4) name old time/op new time/op delta Garbage/benchmem-MB=64-12 2.23ms ± 1% 2.25ms ± 1% +0.84% (p=0.000 n=19+19) (https://perf.golang.org/search?q=upload:20171231.3) Relative to the start of the sparse heap changes (starting at and including "runtime: fix various contiguous bitmap assumptions"), overall slowdown is roughly 1% on GC-intensive benchmarks: name old time/op new time/op delta Template 183ms ± 1% 185ms ± 1% +1.32% (p=0.000 n=9+9) Unicode 84.9ms ± 2% 86.3ms ± 1% +1.65% (p=0.000 n=9+10) GoTypes 595ms ± 1% 602ms ± 0% +1.19% (p=0.000 n=9+9) Compiler 2.86s ± 0% 2.89s ± 1% +0.91% (p=0.000 n=9+10) SSA 7.19s ± 0% 7.25s ± 1% +0.75% (p=0.000 n=8+9) Flate 117ms ± 1% 118ms ± 1% +1.10% (p=0.000 n=10+9) GoParser 146ms ± 2% 148ms ± 1% +1.48% (p=0.002 n=10+10) Reflect 398ms ± 1% 404ms ± 1% +1.51% (p=0.000 n=10+9) Tar 173ms ± 1% 175ms ± 1% +1.17% (p=0.000 n=10+10) XML 208ms ± 1% 210ms ± 1% +0.62% (p=0.011 n=10+10) [Geo mean] 369ms 373ms +1.17% (https://perf.golang.org/search?q=upload:20180101.2) name old time/op new time/op delta Garbage/benchmem-MB=64-12 2.22ms ± 1% 2.25ms ± 1% +1.51% (p=0.000 n=20+19) (https://perf.golang.org/search?q=upload:20180101.3) Change-Id: I5daf4cfec24b252e5a57001f0a6c03f22479d0f0 Reviewed-on: https://go-review.googlesource.com/85887 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Rick Hudson <rlh@golang.org>
This commit is contained in:
parent
45ffeab549
commit
2b415549b8
10 changed files with 436 additions and 334 deletions
|
|
@ -96,31 +96,13 @@ type mheap struct {
|
|||
nlargefree uint64 // number of frees for large objects (>maxsmallsize)
|
||||
nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
|
||||
|
||||
// range of addresses we might see in the heap
|
||||
|
||||
// The arena_* fields indicate the addresses of the Go heap.
|
||||
//
|
||||
// The maximum range of the Go heap is
|
||||
// [arena_start, arena_start+_MaxMem+1).
|
||||
//
|
||||
// The range of the current Go heap is
|
||||
// [arena_start, arena_used). Parts of this range may not be
|
||||
// mapped, but the metadata structures are always mapped for
|
||||
// the full range.
|
||||
arena_start uintptr
|
||||
arena_used uintptr // Set with setArenaUsed.
|
||||
|
||||
// The heap is grown using a linear allocator that allocates
|
||||
// from the block [arena_alloc, arena_end). arena_alloc is
|
||||
// often, but *not always* equal to arena_used.
|
||||
arena_alloc uintptr
|
||||
arena_end uintptr
|
||||
|
||||
// arena_reserved indicates that the memory [arena_alloc,
|
||||
// arena_end) is reserved (e.g., mapped PROT_NONE). If this is
|
||||
// false, we have to be careful not to clobber existing
|
||||
// mappings here. If this is true, then we own the mapping
|
||||
// here and *must* clobber it to use it.
|
||||
//
|
||||
// TODO(austin): Remove.
|
||||
arena_reserved bool
|
||||
|
||||
// arenas is the heap arena index. arenas[va/heapArenaBytes]
|
||||
|
|
@ -138,7 +120,22 @@ type mheap struct {
|
|||
// to probe any index.
|
||||
arenas *[memLimit / heapArenaBytes]*heapArena
|
||||
|
||||
//_ uint32 // ensure 64-bit alignment of central
|
||||
// heapArenaAlloc is pre-reserved space for allocating heapArena
|
||||
// objects. This is only used on 32-bit, where we pre-reserve
|
||||
// this space to avoid interleaving it with the heap itself.
|
||||
heapArenaAlloc linearAlloc
|
||||
|
||||
// arenaHints is a list of addresses at which to attempt to
|
||||
// add more heap arenas. This is initially populated with a
|
||||
// set of general hint addresses, and grown with the bounds of
|
||||
// actual heap arena ranges.
|
||||
arenaHints *arenaHint
|
||||
|
||||
// arena is a pre-reserved space for allocating heap arenas
|
||||
// (the actual arenas). This is only used on 32-bit.
|
||||
arena linearAlloc
|
||||
|
||||
_ uint32 // ensure 64-bit alignment of central
|
||||
|
||||
// central free lists for small size classes.
|
||||
// the padding makes sure that the MCentrals are
|
||||
|
|
@ -156,6 +153,7 @@ type mheap struct {
|
|||
specialfinalizeralloc fixalloc // allocator for specialfinalizer*
|
||||
specialprofilealloc fixalloc // allocator for specialprofile*
|
||||
speciallock mutex // lock for special record allocators.
|
||||
arenaHintAlloc fixalloc // allocator for arenaHints
|
||||
|
||||
unused *specialfinalizer // never set, just here to force the specialfinalizer type into DWARF
|
||||
}
|
||||
|
|
@ -190,6 +188,16 @@ type heapArena struct {
|
|||
spans [pagesPerArena]*mspan
|
||||
}
|
||||
|
||||
// arenaHint is a hint for where to grow the heap arenas. See
|
||||
// mheap_.arenaHints.
|
||||
//
|
||||
//go:notinheap
|
||||
type arenaHint struct {
|
||||
addr uintptr
|
||||
down bool
|
||||
next *arenaHint
|
||||
}
|
||||
|
||||
// An MSpan is a run of pages.
|
||||
//
|
||||
// When a MSpan is in the heap free list, state == MSpanFree
|
||||
|
|
@ -458,8 +466,7 @@ func spanOf(p uintptr) *mspan {
|
|||
}
|
||||
|
||||
// spanOfUnchecked is equivalent to spanOf, but the caller must ensure
|
||||
// that p points into the heap (that is, mheap_.arena_start <= p <
|
||||
// mheap_.arena_used).
|
||||
// that p points into an allocated heap arena.
|
||||
//
|
||||
// Must be nosplit because it has callers that are nosplit.
|
||||
//
|
||||
|
|
@ -491,6 +498,7 @@ func (h *mheap) init() {
|
|||
h.cachealloc.init(unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys)
|
||||
h.specialfinalizeralloc.init(unsafe.Sizeof(specialfinalizer{}), nil, nil, &memstats.other_sys)
|
||||
h.specialprofilealloc.init(unsafe.Sizeof(specialprofile{}), nil, nil, &memstats.other_sys)
|
||||
h.arenaHintAlloc.init(unsafe.Sizeof(arenaHint{}), nil, nil, &memstats.other_sys)
|
||||
|
||||
// Don't zero mspan allocations. Background sweeping can
|
||||
// inspect a span concurrently with allocating it, so it's
|
||||
|
|
@ -511,46 +519,6 @@ func (h *mheap) init() {
|
|||
for i := range h.central {
|
||||
h.central[i].mcentral.init(spanClass(i))
|
||||
}
|
||||
|
||||
// Map metadata structures. But don't map race detector memory
|
||||
// since we're not actually growing the arena here (and TSAN
|
||||
// gets mad if you map 0 bytes).
|
||||
h.setArenaUsed(h.arena_used, false)
|
||||
}
|
||||
|
||||
// setArenaUsed extends the usable arena to address arena_used and
|
||||
// maps auxiliary VM regions for any newly usable arena space.
|
||||
//
|
||||
// racemap indicates that this memory should be managed by the race
|
||||
// detector. racemap should be true unless this is covering a VM hole.
|
||||
func (h *mheap) setArenaUsed(arena_used uintptr, racemap bool) {
|
||||
// Map auxiliary structures *before* h.arena_used is updated.
|
||||
// Waiting to update arena_used until after the memory has been mapped
|
||||
// avoids faults when other threads try access these regions immediately
|
||||
// after observing the change to arena_used.
|
||||
|
||||
// Allocate heap arena metadata.
|
||||
for ri := h.arena_used / heapArenaBytes; ri < (arena_used+heapArenaBytes-1)/heapArenaBytes; ri++ {
|
||||
if h.arenas[ri] != nil {
|
||||
continue
|
||||
}
|
||||
r := (*heapArena)(persistentalloc(unsafe.Sizeof(heapArena{}), sys.PtrSize, &memstats.gc_sys))
|
||||
if r == nil {
|
||||
throw("runtime: out of memory allocating heap arena metadata")
|
||||
}
|
||||
// Store atomically just in case an object from the
|
||||
// new heap arena becomes visible before the heap lock
|
||||
// is released (which shouldn't happen, but there's
|
||||
// little downside to this).
|
||||
atomic.StorepNoWB(unsafe.Pointer(&h.arenas[ri]), unsafe.Pointer(r))
|
||||
}
|
||||
|
||||
// Tell the race detector about the new heap memory.
|
||||
if racemap && raceenabled {
|
||||
racemapshadow(unsafe.Pointer(h.arena_used), arena_used-h.arena_used)
|
||||
}
|
||||
|
||||
h.arena_used = arena_used
|
||||
}
|
||||
|
||||
// Sweeps spans in list until reclaims at least npages into heap.
|
||||
|
|
@ -886,32 +854,17 @@ func (h *mheap) allocLarge(npage uintptr) *mspan {
|
|||
//
|
||||
// h must be locked.
|
||||
func (h *mheap) grow(npage uintptr) bool {
|
||||
// Ask for a big chunk, to reduce the number of mappings
|
||||
// the operating system needs to track; also amortizes
|
||||
// the overhead of an operating system mapping.
|
||||
// Allocate a multiple of 64kB.
|
||||
npage = round(npage, (64<<10)/_PageSize)
|
||||
ask := npage << _PageShift
|
||||
if ask < _HeapAllocChunk {
|
||||
ask = _HeapAllocChunk
|
||||
}
|
||||
|
||||
v := h.sysAlloc(ask)
|
||||
v, size := h.sysAlloc(ask)
|
||||
if v == nil {
|
||||
if ask > npage<<_PageShift {
|
||||
ask = npage << _PageShift
|
||||
v = h.sysAlloc(ask)
|
||||
}
|
||||
if v == nil {
|
||||
print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n")
|
||||
return false
|
||||
}
|
||||
print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n")
|
||||
return false
|
||||
}
|
||||
|
||||
// Create a fake "in use" span and free it, so that the
|
||||
// right coalescing happens.
|
||||
s := (*mspan)(h.spanalloc.alloc())
|
||||
s.init(uintptr(v), ask>>_PageShift)
|
||||
s.init(uintptr(v), size/pageSize)
|
||||
h.setSpans(s.base(), s.npages, s)
|
||||
atomic.Store(&s.sweepgen, h.sweepgen)
|
||||
s.state = _MSpanInUse
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue