mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
runtime: reorganize memory code
Move code from malloc1.go, malloc2.go, mem.go, mgc0.go into appropriate locations. Factor mgc.go into mgc.go, mgcmark.go, mgcsweep.go, mstats.go. A lot of this code was in certain files because the right place was in a C file but it was written in Go, or vice versa. This is one step toward making things actually well-organized again. Change-Id: I6741deb88a7cfb1c17ffe0bcca3989e10207968f Reviewed-on: https://go-review.googlesource.com/5300 Reviewed-by: Austin Clements <austin@google.com> Reviewed-by: Rick Hudson <rlh@golang.org>
This commit is contained in:
parent
d384545a45
commit
484f801ff4
20 changed files with 2567 additions and 2593 deletions
|
|
@ -13,6 +13,24 @@ package runtime
|
|||
|
||||
import "unsafe"
|
||||
|
||||
//go:linkname runtime_debug_WriteHeapDump runtime/debug.WriteHeapDump
|
||||
func runtime_debug_WriteHeapDump(fd uintptr) {
|
||||
semacquire(&worldsema, false)
|
||||
gp := getg()
|
||||
gp.m.preemptoff = "write heap dump"
|
||||
systemstack(stoptheworld)
|
||||
|
||||
systemstack(func() {
|
||||
writeheapdump_m(fd)
|
||||
})
|
||||
|
||||
gp.m.preemptoff = ""
|
||||
gp.m.locks++
|
||||
semrelease(&worldsema)
|
||||
systemstack(starttheworld)
|
||||
gp.m.locks--
|
||||
}
|
||||
|
||||
const (
|
||||
fieldKindEol = 0
|
||||
fieldKindPtr = 1
|
||||
|
|
|
|||
|
|
@ -2,6 +2,84 @@
|
|||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Memory allocator, based on tcmalloc.
|
||||
// http://goog-perftools.sourceforge.net/doc/tcmalloc.html
|
||||
|
||||
// The main allocator works in runs of pages.
|
||||
// Small allocation sizes (up to and including 32 kB) are
|
||||
// rounded to one of about 100 size classes, each of which
|
||||
// has its own free list of objects of exactly that size.
|
||||
// Any free page of memory can be split into a set of objects
|
||||
// of one size class, which are then managed using free list
|
||||
// allocators.
|
||||
//
|
||||
// The allocator's data structures are:
|
||||
//
|
||||
// FixAlloc: a free-list allocator for fixed-size objects,
|
||||
// used to manage storage used by the allocator.
|
||||
// MHeap: the malloc heap, managed at page (4096-byte) granularity.
|
||||
// MSpan: a run of pages managed by the MHeap.
|
||||
// MCentral: a shared free list for a given size class.
|
||||
// MCache: a per-thread (in Go, per-P) cache for small objects.
|
||||
// MStats: allocation statistics.
|
||||
//
|
||||
// Allocating a small object proceeds up a hierarchy of caches:
|
||||
//
|
||||
// 1. Round the size up to one of the small size classes
|
||||
// and look in the corresponding MCache free list.
|
||||
// If the list is not empty, allocate an object from it.
|
||||
// This can all be done without acquiring a lock.
|
||||
//
|
||||
// 2. If the MCache free list is empty, replenish it by
|
||||
// taking a bunch of objects from the MCentral free list.
|
||||
// Moving a bunch amortizes the cost of acquiring the MCentral lock.
|
||||
//
|
||||
// 3. If the MCentral free list is empty, replenish it by
|
||||
// allocating a run of pages from the MHeap and then
|
||||
// chopping that memory into objects of the given size.
|
||||
// Allocating many objects amortizes the cost of locking
|
||||
// the heap.
|
||||
//
|
||||
// 4. If the MHeap is empty or has no page runs large enough,
|
||||
// allocate a new group of pages (at least 1MB) from the
|
||||
// operating system. Allocating a large run of pages
|
||||
// amortizes the cost of talking to the operating system.
|
||||
//
|
||||
// Freeing a small object proceeds up the same hierarchy:
|
||||
//
|
||||
// 1. Look up the size class for the object and add it to
|
||||
// the MCache free list.
|
||||
//
|
||||
// 2. If the MCache free list is too long or the MCache has
|
||||
// too much memory, return some to the MCentral free lists.
|
||||
//
|
||||
// 3. If all the objects in a given span have returned to
|
||||
// the MCentral list, return that span to the page heap.
|
||||
//
|
||||
// 4. If the heap has too much memory, return some to the
|
||||
// operating system.
|
||||
//
|
||||
// TODO(rsc): Step 4 is not implemented.
|
||||
//
|
||||
// Allocating and freeing a large object uses the page heap
|
||||
// directly, bypassing the MCache and MCentral free lists.
|
||||
//
|
||||
// The small objects on the MCache and MCentral free lists
|
||||
// may or may not be zeroed. They are zeroed if and only if
|
||||
// the second word of the object is zero. A span in the
|
||||
// page heap is zeroed unless s->needzero is set. When a span
|
||||
// is allocated to break into small objects, it is zeroed if needed
|
||||
// and s->needzero is set. There are two main benefits to delaying the
|
||||
// zeroing this way:
|
||||
//
|
||||
// 1. stack frames allocated from the small object lists
|
||||
// or the page heap can avoid zeroing altogether.
|
||||
// 2. the cost of zeroing when reusing a small object is
|
||||
// charged to the mutator, not the garbage collector.
|
||||
//
|
||||
// This code was written with an eye toward translating to Go
|
||||
// in the future. Methods have the form Type_Method(Type *t, ...).
|
||||
|
||||
package runtime
|
||||
|
||||
import "unsafe"
|
||||
|
|
@ -25,29 +103,369 @@ const (
|
|||
concurrentSweep = _ConcurrentSweep
|
||||
)
|
||||
|
||||
const (
|
||||
_PageShift = 13
|
||||
_PageSize = 1 << _PageShift
|
||||
_PageMask = _PageSize - 1
|
||||
)
|
||||
|
||||
const (
|
||||
// _64bit = 1 on 64-bit systems, 0 on 32-bit systems
|
||||
_64bit = 1 << (^uintptr(0) >> 63) / 2
|
||||
|
||||
// Computed constant. The definition of MaxSmallSize and the
|
||||
// algorithm in msize.c produce some number of different allocation
|
||||
// size classes. NumSizeClasses is that number. It's needed here
|
||||
// because there are static arrays of this length; when msize runs its
|
||||
// size choosing algorithm it double-checks that NumSizeClasses agrees.
|
||||
_NumSizeClasses = 67
|
||||
|
||||
// Tunable constants.
|
||||
_MaxSmallSize = 32 << 10
|
||||
|
||||
// Tiny allocator parameters, see "Tiny allocator" comment in malloc.go.
|
||||
_TinySize = 16
|
||||
_TinySizeClass = 2
|
||||
|
||||
_FixAllocChunk = 16 << 10 // Chunk size for FixAlloc
|
||||
_MaxMHeapList = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap.
|
||||
_HeapAllocChunk = 1 << 20 // Chunk size for heap growth
|
||||
|
||||
// Per-P, per order stack segment cache size.
|
||||
_StackCacheSize = 32 * 1024
|
||||
|
||||
// Number of orders that get caching. Order 0 is FixedStack
|
||||
// and each successive order is twice as large.
|
||||
// We want to cache 2KB, 4KB, 8KB, and 16KB stacks. Larger stacks
|
||||
// will be allocated directly.
|
||||
// Since FixedStack is different on different systems, we
|
||||
// must vary NumStackOrders to keep the same maximum cached size.
|
||||
// OS | FixedStack | NumStackOrders
|
||||
// -----------------+------------+---------------
|
||||
// linux/darwin/bsd | 2KB | 4
|
||||
// windows/32 | 4KB | 3
|
||||
// windows/64 | 8KB | 2
|
||||
// plan9 | 4KB | 3
|
||||
_NumStackOrders = 4 - ptrSize/4*goos_windows - 1*goos_plan9
|
||||
|
||||
// Number of bits in page to span calculations (4k pages).
|
||||
// On Windows 64-bit we limit the arena to 32GB or 35 bits.
|
||||
// Windows counts memory used by page table into committed memory
|
||||
// of the process, so we can't reserve too much memory.
|
||||
// See http://golang.org/issue/5402 and http://golang.org/issue/5236.
|
||||
// On other 64-bit platforms, we limit the arena to 128GB, or 37 bits.
|
||||
// On 32-bit, we don't bother limiting anything, so we use the full 32-bit address.
|
||||
_MHeapMap_TotalBits = (_64bit*goos_windows)*35 + (_64bit*(1-goos_windows))*37 + (1-_64bit)*32
|
||||
_MHeapMap_Bits = _MHeapMap_TotalBits - _PageShift
|
||||
|
||||
_MaxMem = uintptr(1<<_MHeapMap_TotalBits - 1)
|
||||
|
||||
// Max number of threads to run garbage collection.
|
||||
// 2, 3, and 4 are all plausible maximums depending
|
||||
// on the hardware details of the machine. The garbage
|
||||
// collector scales well to 32 cpus.
|
||||
_MaxGcproc = 32
|
||||
)
|
||||
|
||||
// Page number (address>>pageShift)
|
||||
type pageID uintptr
|
||||
|
||||
const _MaxArena32 = 2 << 30
|
||||
|
||||
// OS-defined helpers:
|
||||
//
|
||||
// sysAlloc obtains a large chunk of zeroed memory from the
|
||||
// operating system, typically on the order of a hundred kilobytes
|
||||
// or a megabyte.
|
||||
// NOTE: sysAlloc returns OS-aligned memory, but the heap allocator
|
||||
// may use larger alignment, so the caller must be careful to realign the
|
||||
// memory obtained by sysAlloc.
|
||||
//
|
||||
// SysUnused notifies the operating system that the contents
|
||||
// of the memory region are no longer needed and can be reused
|
||||
// for other purposes.
|
||||
// SysUsed notifies the operating system that the contents
|
||||
// of the memory region are needed again.
|
||||
//
|
||||
// SysFree returns it unconditionally; this is only used if
|
||||
// an out-of-memory error has been detected midway through
|
||||
// an allocation. It is okay if SysFree is a no-op.
|
||||
//
|
||||
// SysReserve reserves address space without allocating memory.
|
||||
// If the pointer passed to it is non-nil, the caller wants the
|
||||
// reservation there, but SysReserve can still choose another
|
||||
// location if that one is unavailable. On some systems and in some
|
||||
// cases SysReserve will simply check that the address space is
|
||||
// available and not actually reserve it. If SysReserve returns
|
||||
// non-nil, it sets *reserved to true if the address space is
|
||||
// reserved, false if it has merely been checked.
|
||||
// NOTE: SysReserve returns OS-aligned memory, but the heap allocator
|
||||
// may use larger alignment, so the caller must be careful to realign the
|
||||
// memory obtained by sysAlloc.
|
||||
//
|
||||
// SysMap maps previously reserved address space for use.
|
||||
// The reserved argument is true if the address space was really
|
||||
// reserved, not merely checked.
|
||||
//
|
||||
// SysFault marks a (already sysAlloc'd) region to fault
|
||||
// if accessed. Used only for debugging the runtime.
|
||||
|
||||
func mallocinit() {
|
||||
initSizes()
|
||||
|
||||
if class_to_size[_TinySizeClass] != _TinySize {
|
||||
throw("bad TinySizeClass")
|
||||
}
|
||||
|
||||
var p, bitmapSize, spansSize, pSize, limit uintptr
|
||||
var reserved bool
|
||||
|
||||
// limit = runtime.memlimit();
|
||||
// See https://golang.org/issue/5049
|
||||
// TODO(rsc): Fix after 1.1.
|
||||
limit = 0
|
||||
|
||||
// Set up the allocation arena, a contiguous area of memory where
|
||||
// allocated data will be found. The arena begins with a bitmap large
|
||||
// enough to hold 4 bits per allocated word.
|
||||
if ptrSize == 8 && (limit == 0 || limit > 1<<30) {
|
||||
// On a 64-bit machine, allocate from a single contiguous reservation.
|
||||
// 128 GB (MaxMem) should be big enough for now.
|
||||
//
|
||||
// The code will work with the reservation at any address, but ask
|
||||
// SysReserve to use 0x0000XXc000000000 if possible (XX=00...7f).
|
||||
// Allocating a 128 GB region takes away 37 bits, and the amd64
|
||||
// doesn't let us choose the top 17 bits, so that leaves the 11 bits
|
||||
// in the middle of 0x00c0 for us to choose. Choosing 0x00c0 means
|
||||
// that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x00df.
|
||||
// In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid
|
||||
// UTF-8 sequences, and they are otherwise as far away from
|
||||
// ff (likely a common byte) as possible. If that fails, we try other 0xXXc0
|
||||
// addresses. An earlier attempt to use 0x11f8 caused out of memory errors
|
||||
// on OS X during thread allocations. 0x00c0 causes conflicts with
|
||||
// AddressSanitizer which reserves all memory up to 0x0100.
|
||||
// These choices are both for debuggability and to reduce the
|
||||
// odds of the conservative garbage collector not collecting memory
|
||||
// because some non-pointer block of memory had a bit pattern
|
||||
// that matched a memory address.
|
||||
//
|
||||
// Actually we reserve 136 GB (because the bitmap ends up being 8 GB)
|
||||
// but it hardly matters: e0 00 is not valid UTF-8 either.
|
||||
//
|
||||
// If this fails we fall back to the 32 bit memory mechanism
|
||||
arenaSize := round(_MaxMem, _PageSize)
|
||||
bitmapSize = arenaSize / (ptrSize * 8 / 4)
|
||||
spansSize = arenaSize / _PageSize * ptrSize
|
||||
spansSize = round(spansSize, _PageSize)
|
||||
for i := 0; i <= 0x7f; i++ {
|
||||
p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
|
||||
pSize = bitmapSize + spansSize + arenaSize + _PageSize
|
||||
p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved))
|
||||
if p != 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if p == 0 {
|
||||
// On a 32-bit machine, we can't typically get away
|
||||
// with a giant virtual address space reservation.
|
||||
// Instead we map the memory information bitmap
|
||||
// immediately after the data segment, large enough
|
||||
// to handle another 2GB of mappings (256 MB),
|
||||
// along with a reservation for an initial arena.
|
||||
// When that gets used up, we'll start asking the kernel
|
||||
// for any memory anywhere and hope it's in the 2GB
|
||||
// following the bitmap (presumably the executable begins
|
||||
// near the bottom of memory, so we'll have to use up
|
||||
// most of memory before the kernel resorts to giving out
|
||||
// memory before the beginning of the text segment).
|
||||
//
|
||||
// Alternatively we could reserve 512 MB bitmap, enough
|
||||
// for 4GB of mappings, and then accept any memory the
|
||||
// kernel threw at us, but normally that's a waste of 512 MB
|
||||
// of address space, which is probably too much in a 32-bit world.
|
||||
|
||||
// If we fail to allocate, try again with a smaller arena.
|
||||
// This is necessary on Android L where we share a process
|
||||
// with ART, which reserves virtual memory aggressively.
|
||||
arenaSizes := []uintptr{
|
||||
512 << 20,
|
||||
256 << 20,
|
||||
}
|
||||
|
||||
for _, arenaSize := range arenaSizes {
|
||||
bitmapSize = _MaxArena32 / (ptrSize * 8 / 4)
|
||||
spansSize = _MaxArena32 / _PageSize * ptrSize
|
||||
if limit > 0 && arenaSize+bitmapSize+spansSize > limit {
|
||||
bitmapSize = (limit / 9) &^ ((1 << _PageShift) - 1)
|
||||
arenaSize = bitmapSize * 8
|
||||
spansSize = arenaSize / _PageSize * ptrSize
|
||||
}
|
||||
spansSize = round(spansSize, _PageSize)
|
||||
|
||||
// SysReserve treats the address we ask for, end, as a hint,
|
||||
// not as an absolute requirement. If we ask for the end
|
||||
// of the data segment but the operating system requires
|
||||
// a little more space before we can start allocating, it will
|
||||
// give out a slightly higher pointer. Except QEMU, which
|
||||
// is buggy, as usual: it won't adjust the pointer upward.
|
||||
// So adjust it upward a little bit ourselves: 1/4 MB to get
|
||||
// away from the running binary image and then round up
|
||||
// to a MB boundary.
|
||||
p = round(uintptr(unsafe.Pointer(&end))+(1<<18), 1<<20)
|
||||
pSize = bitmapSize + spansSize + arenaSize + _PageSize
|
||||
p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved))
|
||||
if p != 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if p == 0 {
|
||||
throw("runtime: cannot reserve arena virtual address space")
|
||||
}
|
||||
}
|
||||
|
||||
// PageSize can be larger than OS definition of page size,
|
||||
// so SysReserve can give us a PageSize-unaligned pointer.
|
||||
// To overcome this we ask for PageSize more and round up the pointer.
|
||||
p1 := round(p, _PageSize)
|
||||
|
||||
mheap_.spans = (**mspan)(unsafe.Pointer(p1))
|
||||
mheap_.bitmap = p1 + spansSize
|
||||
mheap_.arena_start = p1 + (spansSize + bitmapSize)
|
||||
mheap_.arena_used = mheap_.arena_start
|
||||
mheap_.arena_end = p + pSize
|
||||
mheap_.arena_reserved = reserved
|
||||
|
||||
if mheap_.arena_start&(_PageSize-1) != 0 {
|
||||
println("bad pagesize", hex(p), hex(p1), hex(spansSize), hex(bitmapSize), hex(_PageSize), "start", hex(mheap_.arena_start))
|
||||
throw("misrounded allocation in mallocinit")
|
||||
}
|
||||
|
||||
// Initialize the rest of the allocator.
|
||||
mHeap_Init(&mheap_, spansSize)
|
||||
_g_ := getg()
|
||||
_g_.m.mcache = allocmcache()
|
||||
}
|
||||
|
||||
// sysReserveHigh reserves space somewhere high in the address space.
|
||||
// sysReserve doesn't actually reserve the full amount requested on
|
||||
// 64-bit systems, because of problems with ulimit. Instead it checks
|
||||
// that it can get the first 64 kB and assumes it can grab the rest as
|
||||
// needed. This doesn't work well with the "let the kernel pick an address"
|
||||
// mode, so don't do that. Pick a high address instead.
|
||||
func sysReserveHigh(n uintptr, reserved *bool) unsafe.Pointer {
|
||||
if ptrSize == 4 {
|
||||
return sysReserve(nil, n, reserved)
|
||||
}
|
||||
|
||||
for i := 0; i <= 0x7f; i++ {
|
||||
p := uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
|
||||
*reserved = false
|
||||
p = uintptr(sysReserve(unsafe.Pointer(p), n, reserved))
|
||||
if p != 0 {
|
||||
return unsafe.Pointer(p)
|
||||
}
|
||||
}
|
||||
|
||||
return sysReserve(nil, n, reserved)
|
||||
}
|
||||
|
||||
func mHeap_SysAlloc(h *mheap, n uintptr) unsafe.Pointer {
|
||||
if n > uintptr(h.arena_end)-uintptr(h.arena_used) {
|
||||
// We are in 32-bit mode, maybe we didn't use all possible address space yet.
|
||||
// Reserve some more space.
|
||||
p_size := round(n+_PageSize, 256<<20)
|
||||
new_end := h.arena_end + p_size
|
||||
if new_end <= h.arena_start+_MaxArena32 {
|
||||
// TODO: It would be bad if part of the arena
|
||||
// is reserved and part is not.
|
||||
var reserved bool
|
||||
p := uintptr(sysReserve((unsafe.Pointer)(h.arena_end), p_size, &reserved))
|
||||
if p == h.arena_end {
|
||||
h.arena_end = new_end
|
||||
h.arena_reserved = reserved
|
||||
} else if p+p_size <= h.arena_start+_MaxArena32 {
|
||||
// Keep everything page-aligned.
|
||||
// Our pages are bigger than hardware pages.
|
||||
h.arena_end = p + p_size
|
||||
h.arena_used = p + (-uintptr(p) & (_PageSize - 1))
|
||||
h.arena_reserved = reserved
|
||||
} else {
|
||||
var stat uint64
|
||||
sysFree((unsafe.Pointer)(p), p_size, &stat)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if n <= uintptr(h.arena_end)-uintptr(h.arena_used) {
|
||||
// Keep taking from our reservation.
|
||||
p := h.arena_used
|
||||
sysMap((unsafe.Pointer)(p), n, h.arena_reserved, &memstats.heap_sys)
|
||||
h.arena_used += n
|
||||
mHeap_MapBits(h)
|
||||
mHeap_MapSpans(h)
|
||||
if raceenabled {
|
||||
racemapshadow((unsafe.Pointer)(p), n)
|
||||
}
|
||||
if mheap_.shadow_enabled {
|
||||
sysMap(unsafe.Pointer(p+mheap_.shadow_heap), n, h.shadow_reserved, &memstats.other_sys)
|
||||
}
|
||||
|
||||
if uintptr(p)&(_PageSize-1) != 0 {
|
||||
throw("misrounded allocation in MHeap_SysAlloc")
|
||||
}
|
||||
return (unsafe.Pointer)(p)
|
||||
}
|
||||
|
||||
// If using 64-bit, our reservation is all we have.
|
||||
if uintptr(h.arena_end)-uintptr(h.arena_start) >= _MaxArena32 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// On 32-bit, once the reservation is gone we can
|
||||
// try to get memory at a location chosen by the OS
|
||||
// and hope that it is in the range we allocated bitmap for.
|
||||
p_size := round(n, _PageSize) + _PageSize
|
||||
p := uintptr(sysAlloc(p_size, &memstats.heap_sys))
|
||||
if p == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if p < h.arena_start || uintptr(p)+p_size-uintptr(h.arena_start) >= _MaxArena32 {
|
||||
print("runtime: memory allocated by OS (", p, ") not in usable range [", hex(h.arena_start), ",", hex(h.arena_start+_MaxArena32), ")\n")
|
||||
sysFree((unsafe.Pointer)(p), p_size, &memstats.heap_sys)
|
||||
return nil
|
||||
}
|
||||
|
||||
p_end := p + p_size
|
||||
p += -p & (_PageSize - 1)
|
||||
if uintptr(p)+n > uintptr(h.arena_used) {
|
||||
h.arena_used = p + n
|
||||
if p_end > h.arena_end {
|
||||
h.arena_end = p_end
|
||||
}
|
||||
mHeap_MapBits(h)
|
||||
mHeap_MapSpans(h)
|
||||
if raceenabled {
|
||||
racemapshadow((unsafe.Pointer)(p), n)
|
||||
}
|
||||
}
|
||||
|
||||
if uintptr(p)&(_PageSize-1) != 0 {
|
||||
throw("misrounded allocation in MHeap_SysAlloc")
|
||||
}
|
||||
return (unsafe.Pointer)(p)
|
||||
}
|
||||
|
||||
// base address for all 0-byte allocations
|
||||
var zerobase uintptr
|
||||
|
||||
// Trigger the concurrent GC when 1/triggerratio memory is available to allocate.
|
||||
// Adjust this ratio as part of a scheme to ensure that mutators have enough
|
||||
// memory to allocate in durring a concurrent GC cycle.
|
||||
var triggerratio = int64(8)
|
||||
|
||||
// Determine whether to initiate a GC.
|
||||
// If the GC is already working no need to trigger another one.
|
||||
// This should establish a feedback loop where if the GC does not
|
||||
// have sufficient time to complete then more memory will be
|
||||
// requested from the OS increasing heap size thus allow future
|
||||
// GCs more time to complete.
|
||||
// memstat.heap_alloc and memstat.next_gc reads have benign races
|
||||
// A false negative simple does not start a GC, a false positive
|
||||
// will start a GC needlessly. Neither have correctness issues.
|
||||
func shouldtriggergc() bool {
|
||||
return triggerratio*(int64(memstats.next_gc)-int64(memstats.heap_alloc)) <= int64(memstats.next_gc) && atomicloaduint(&bggc.working) == 0
|
||||
}
|
||||
const (
|
||||
// flags to malloc
|
||||
_FlagNoScan = 1 << 0 // GC doesn't have to scan object
|
||||
_FlagNoZero = 1 << 1 // don't zero memory
|
||||
)
|
||||
|
||||
// Allocate an object of size bytes.
|
||||
// Small objects are allocated from the per-P cache's free lists.
|
||||
|
|
@ -250,6 +668,25 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
|
|||
return x
|
||||
}
|
||||
|
||||
func largeAlloc(size uintptr, flag uint32) *mspan {
|
||||
// print("largeAlloc size=", size, "\n")
|
||||
|
||||
if size+_PageSize < size {
|
||||
throw("out of memory")
|
||||
}
|
||||
npages := size >> _PageShift
|
||||
if size&_PageMask != 0 {
|
||||
npages++
|
||||
}
|
||||
s := mHeap_Alloc(&mheap_, npages, 0, true, flag&_FlagNoZero == 0)
|
||||
if s == nil {
|
||||
throw("out of memory")
|
||||
}
|
||||
s.limit = uintptr(s.start)<<_PageShift + size
|
||||
heapBitsForSpan(s.base()).initSpan(s.layout())
|
||||
return s
|
||||
}
|
||||
|
||||
// implementation of new builtin
|
||||
func newobject(typ *_type) unsafe.Pointer {
|
||||
flags := uint32(0)
|
||||
|
|
@ -310,289 +747,6 @@ func profilealloc(mp *m, x unsafe.Pointer, size uintptr) {
|
|||
mProf_Malloc(x, size)
|
||||
}
|
||||
|
||||
// For now this must be bracketed with a stoptheworld and a starttheworld to ensure
|
||||
// all go routines see the new barrier.
|
||||
//go:nowritebarrier
|
||||
func gcinstallmarkwb() {
|
||||
gcphase = _GCmark
|
||||
}
|
||||
|
||||
// force = 0 - start concurrent GC
|
||||
// force = 1 - do STW GC regardless of current heap usage
|
||||
// force = 2 - go STW GC and eager sweep
|
||||
func gogc(force int32) {
|
||||
// The gc is turned off (via enablegc) until the bootstrap has completed.
|
||||
// Also, malloc gets called in the guts of a number of libraries that might be
|
||||
// holding locks. To avoid deadlocks during stoptheworld, don't bother
|
||||
// trying to run gc while holding a lock. The next mallocgc without a lock
|
||||
// will do the gc instead.
|
||||
|
||||
mp := acquirem()
|
||||
if gp := getg(); gp == mp.g0 || mp.locks > 1 || !memstats.enablegc || panicking != 0 || gcpercent < 0 {
|
||||
releasem(mp)
|
||||
return
|
||||
}
|
||||
releasem(mp)
|
||||
mp = nil
|
||||
|
||||
if force == 0 {
|
||||
lock(&bggc.lock)
|
||||
if !bggc.started {
|
||||
bggc.working = 1
|
||||
bggc.started = true
|
||||
go backgroundgc()
|
||||
} else if bggc.working == 0 {
|
||||
bggc.working = 1
|
||||
ready(bggc.g)
|
||||
}
|
||||
unlock(&bggc.lock)
|
||||
} else {
|
||||
gcwork(force)
|
||||
}
|
||||
}
|
||||
|
||||
func gcwork(force int32) {
|
||||
|
||||
semacquire(&worldsema, false)
|
||||
|
||||
// Pick up the remaining unswept/not being swept spans concurrently
|
||||
for gosweepone() != ^uintptr(0) {
|
||||
sweep.nbgsweep++
|
||||
}
|
||||
|
||||
// Ok, we're doing it! Stop everybody else
|
||||
|
||||
mp := acquirem()
|
||||
mp.preemptoff = "gcing"
|
||||
releasem(mp)
|
||||
gctimer.count++
|
||||
if force == 0 {
|
||||
gctimer.cycle.sweepterm = nanotime()
|
||||
}
|
||||
|
||||
if trace.enabled {
|
||||
traceGoSched()
|
||||
traceGCStart()
|
||||
}
|
||||
|
||||
// Pick up the remaining unswept/not being swept spans before we STW
|
||||
for gosweepone() != ^uintptr(0) {
|
||||
sweep.nbgsweep++
|
||||
}
|
||||
systemstack(stoptheworld)
|
||||
systemstack(finishsweep_m) // finish sweep before we start concurrent scan.
|
||||
if force == 0 { // Do as much work concurrently as possible
|
||||
gcphase = _GCscan
|
||||
systemstack(starttheworld)
|
||||
gctimer.cycle.scan = nanotime()
|
||||
// Do a concurrent heap scan before we stop the world.
|
||||
systemstack(gcscan_m)
|
||||
gctimer.cycle.installmarkwb = nanotime()
|
||||
systemstack(stoptheworld)
|
||||
systemstack(gcinstallmarkwb)
|
||||
systemstack(harvestwbufs)
|
||||
systemstack(starttheworld)
|
||||
gctimer.cycle.mark = nanotime()
|
||||
systemstack(gcmark_m)
|
||||
gctimer.cycle.markterm = nanotime()
|
||||
systemstack(stoptheworld)
|
||||
systemstack(gcinstalloffwb_m)
|
||||
} else {
|
||||
// For non-concurrent GC (force != 0) g stack have not been scanned so
|
||||
// set gcscanvalid such that mark termination scans all stacks.
|
||||
// No races here since we are in a STW phase.
|
||||
for _, gp := range allgs {
|
||||
gp.gcworkdone = false // set to true in gcphasework
|
||||
gp.gcscanvalid = false // stack has not been scanned
|
||||
}
|
||||
}
|
||||
|
||||
startTime := nanotime()
|
||||
if mp != acquirem() {
|
||||
throw("gogc: rescheduled")
|
||||
}
|
||||
|
||||
clearpools()
|
||||
|
||||
// Run gc on the g0 stack. We do this so that the g stack
|
||||
// we're currently running on will no longer change. Cuts
|
||||
// the root set down a bit (g0 stacks are not scanned, and
|
||||
// we don't need to scan gc's internal state). We also
|
||||
// need to switch to g0 so we can shrink the stack.
|
||||
n := 1
|
||||
if debug.gctrace > 1 {
|
||||
n = 2
|
||||
}
|
||||
eagersweep := force >= 2
|
||||
for i := 0; i < n; i++ {
|
||||
if i > 0 {
|
||||
// refresh start time if doing a second GC
|
||||
startTime = nanotime()
|
||||
}
|
||||
// switch to g0, call gc, then switch back
|
||||
systemstack(func() {
|
||||
gc_m(startTime, eagersweep)
|
||||
})
|
||||
}
|
||||
|
||||
systemstack(func() {
|
||||
gccheckmark_m(startTime, eagersweep)
|
||||
})
|
||||
|
||||
if trace.enabled {
|
||||
traceGCDone()
|
||||
traceGoStart()
|
||||
}
|
||||
|
||||
// all done
|
||||
mp.preemptoff = ""
|
||||
|
||||
if force == 0 {
|
||||
gctimer.cycle.sweep = nanotime()
|
||||
}
|
||||
|
||||
semrelease(&worldsema)
|
||||
|
||||
if force == 0 {
|
||||
if gctimer.verbose > 1 {
|
||||
GCprinttimes()
|
||||
} else if gctimer.verbose > 0 {
|
||||
calctimes() // ignore result
|
||||
}
|
||||
}
|
||||
|
||||
systemstack(starttheworld)
|
||||
|
||||
releasem(mp)
|
||||
mp = nil
|
||||
|
||||
// now that gc is done, kick off finalizer thread if needed
|
||||
if !concurrentSweep {
|
||||
// give the queued finalizers, if any, a chance to run
|
||||
Gosched()
|
||||
}
|
||||
}
|
||||
|
||||
// gctimes records the time in nanoseconds of each phase of the concurrent GC.
|
||||
type gctimes struct {
|
||||
sweepterm int64 // stw
|
||||
scan int64
|
||||
installmarkwb int64 // stw
|
||||
mark int64
|
||||
markterm int64 // stw
|
||||
sweep int64
|
||||
}
|
||||
|
||||
// gcchronograph holds timer information related to GC phases
|
||||
// max records the maximum time spent in each GC phase since GCstarttimes.
|
||||
// total records the total time spent in each GC phase since GCstarttimes.
|
||||
// cycle records the absolute time (as returned by nanoseconds()) that each GC phase last started at.
|
||||
type gcchronograph struct {
|
||||
count int64
|
||||
verbose int64
|
||||
maxpause int64
|
||||
max gctimes
|
||||
total gctimes
|
||||
cycle gctimes
|
||||
}
|
||||
|
||||
var gctimer gcchronograph
|
||||
|
||||
// GCstarttimes initializes the gc times. All previous times are lost.
|
||||
func GCstarttimes(verbose int64) {
|
||||
gctimer = gcchronograph{verbose: verbose}
|
||||
}
|
||||
|
||||
// GCendtimes stops the gc timers.
|
||||
func GCendtimes() {
|
||||
gctimer.verbose = 0
|
||||
}
|
||||
|
||||
// calctimes converts gctimer.cycle into the elapsed times, updates gctimer.total
|
||||
// and updates gctimer.max with the max pause time.
|
||||
func calctimes() gctimes {
|
||||
var times gctimes
|
||||
|
||||
var max = func(a, b int64) int64 {
|
||||
if a > b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
times.sweepterm = gctimer.cycle.scan - gctimer.cycle.sweepterm
|
||||
gctimer.total.sweepterm += times.sweepterm
|
||||
gctimer.max.sweepterm = max(gctimer.max.sweepterm, times.sweepterm)
|
||||
gctimer.maxpause = max(gctimer.maxpause, gctimer.max.sweepterm)
|
||||
|
||||
times.scan = gctimer.cycle.installmarkwb - gctimer.cycle.scan
|
||||
gctimer.total.scan += times.scan
|
||||
gctimer.max.scan = max(gctimer.max.scan, times.scan)
|
||||
|
||||
times.installmarkwb = gctimer.cycle.mark - gctimer.cycle.installmarkwb
|
||||
gctimer.total.installmarkwb += times.installmarkwb
|
||||
gctimer.max.installmarkwb = max(gctimer.max.installmarkwb, times.installmarkwb)
|
||||
gctimer.maxpause = max(gctimer.maxpause, gctimer.max.installmarkwb)
|
||||
|
||||
times.mark = gctimer.cycle.markterm - gctimer.cycle.mark
|
||||
gctimer.total.mark += times.mark
|
||||
gctimer.max.mark = max(gctimer.max.mark, times.mark)
|
||||
|
||||
times.markterm = gctimer.cycle.sweep - gctimer.cycle.markterm
|
||||
gctimer.total.markterm += times.markterm
|
||||
gctimer.max.markterm = max(gctimer.max.markterm, times.markterm)
|
||||
gctimer.maxpause = max(gctimer.maxpause, gctimer.max.markterm)
|
||||
|
||||
return times
|
||||
}
|
||||
|
||||
// GCprinttimes prints latency information in nanoseconds about various
|
||||
// phases in the GC. The information for each phase includes the maximum pause
|
||||
// and total time since the most recent call to GCstarttimes as well as
|
||||
// the information from the most recent Concurent GC cycle. Calls from the
|
||||
// application to runtime.GC() are ignored.
|
||||
func GCprinttimes() {
|
||||
if gctimer.verbose == 0 {
|
||||
println("GC timers not enabled")
|
||||
return
|
||||
}
|
||||
|
||||
// Explicitly put times on the heap so printPhase can use it.
|
||||
times := new(gctimes)
|
||||
*times = calctimes()
|
||||
cycletime := gctimer.cycle.sweep - gctimer.cycle.sweepterm
|
||||
pause := times.sweepterm + times.installmarkwb + times.markterm
|
||||
gomaxprocs := GOMAXPROCS(-1)
|
||||
|
||||
printlock()
|
||||
print("GC: #", gctimer.count, " ", cycletime, "ns @", gctimer.cycle.sweepterm, " pause=", pause, " maxpause=", gctimer.maxpause, " goroutines=", allglen, " gomaxprocs=", gomaxprocs, "\n")
|
||||
printPhase := func(label string, get func(*gctimes) int64, procs int) {
|
||||
print("GC: ", label, " ", get(times), "ns\tmax=", get(&gctimer.max), "\ttotal=", get(&gctimer.total), "\tprocs=", procs, "\n")
|
||||
}
|
||||
printPhase("sweep term:", func(t *gctimes) int64 { return t.sweepterm }, gomaxprocs)
|
||||
printPhase("scan: ", func(t *gctimes) int64 { return t.scan }, 1)
|
||||
printPhase("install wb:", func(t *gctimes) int64 { return t.installmarkwb }, gomaxprocs)
|
||||
printPhase("mark: ", func(t *gctimes) int64 { return t.mark }, 1)
|
||||
printPhase("mark term: ", func(t *gctimes) int64 { return t.markterm }, gomaxprocs)
|
||||
printunlock()
|
||||
}
|
||||
|
||||
// GC runs a garbage collection.
|
||||
func GC() {
|
||||
gogc(2)
|
||||
}
|
||||
|
||||
// linker-provided
|
||||
var noptrdata struct{}
|
||||
var enoptrdata struct{}
|
||||
var noptrbss struct{}
|
||||
var enoptrbss struct{}
|
||||
|
||||
// round n up to a multiple of a. a must be a power of 2.
|
||||
func round(n, a uintptr) uintptr {
|
||||
return (n + a - 1) &^ (a - 1)
|
||||
}
|
||||
|
||||
var persistent struct {
|
||||
lock mutex
|
||||
base unsafe.Pointer
|
||||
|
|
|
|||
|
|
@ -1,358 +0,0 @@
|
|||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// See malloc.h for overview.
|
||||
//
|
||||
// TODO(rsc): double-check stats.
|
||||
|
||||
package runtime
|
||||
|
||||
import "unsafe"
|
||||
|
||||
const _MaxArena32 = 2 << 30
|
||||
|
||||
// For use by Go. If it were a C enum it would be made available automatically,
|
||||
// but the value of MaxMem is too large for enum.
|
||||
// XXX - uintptr runtime·maxmem = MaxMem;
|
||||
|
||||
func mlookup(v uintptr, base *uintptr, size *uintptr, sp **mspan) int32 {
|
||||
_g_ := getg()
|
||||
|
||||
_g_.m.mcache.local_nlookup++
|
||||
if ptrSize == 4 && _g_.m.mcache.local_nlookup >= 1<<30 {
|
||||
// purge cache stats to prevent overflow
|
||||
lock(&mheap_.lock)
|
||||
purgecachedstats(_g_.m.mcache)
|
||||
unlock(&mheap_.lock)
|
||||
}
|
||||
|
||||
s := mHeap_LookupMaybe(&mheap_, unsafe.Pointer(v))
|
||||
if sp != nil {
|
||||
*sp = s
|
||||
}
|
||||
if s == nil {
|
||||
if base != nil {
|
||||
*base = 0
|
||||
}
|
||||
if size != nil {
|
||||
*size = 0
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
p := uintptr(s.start) << _PageShift
|
||||
if s.sizeclass == 0 {
|
||||
// Large object.
|
||||
if base != nil {
|
||||
*base = p
|
||||
}
|
||||
if size != nil {
|
||||
*size = s.npages << _PageShift
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
n := s.elemsize
|
||||
if base != nil {
|
||||
i := (uintptr(v) - uintptr(p)) / n
|
||||
*base = p + i*n
|
||||
}
|
||||
if size != nil {
|
||||
*size = n
|
||||
}
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
//go:nosplit
|
||||
func purgecachedstats(c *mcache) {
|
||||
// Protected by either heap or GC lock.
|
||||
h := &mheap_
|
||||
memstats.heap_alloc += uint64(c.local_cachealloc)
|
||||
c.local_cachealloc = 0
|
||||
if trace.enabled {
|
||||
traceHeapAlloc()
|
||||
}
|
||||
memstats.tinyallocs += uint64(c.local_tinyallocs)
|
||||
c.local_tinyallocs = 0
|
||||
memstats.nlookup += uint64(c.local_nlookup)
|
||||
c.local_nlookup = 0
|
||||
h.largefree += uint64(c.local_largefree)
|
||||
c.local_largefree = 0
|
||||
h.nlargefree += uint64(c.local_nlargefree)
|
||||
c.local_nlargefree = 0
|
||||
for i := 0; i < len(c.local_nsmallfree); i++ {
|
||||
h.nsmallfree[i] += uint64(c.local_nsmallfree[i])
|
||||
c.local_nsmallfree[i] = 0
|
||||
}
|
||||
}
|
||||
|
||||
func mallocinit() {
|
||||
initSizes()
|
||||
|
||||
if class_to_size[_TinySizeClass] != _TinySize {
|
||||
throw("bad TinySizeClass")
|
||||
}
|
||||
|
||||
var p, bitmapSize, spansSize, pSize, limit uintptr
|
||||
var reserved bool
|
||||
|
||||
// limit = runtime.memlimit();
|
||||
// See https://golang.org/issue/5049
|
||||
// TODO(rsc): Fix after 1.1.
|
||||
limit = 0
|
||||
|
||||
// Set up the allocation arena, a contiguous area of memory where
|
||||
// allocated data will be found. The arena begins with a bitmap large
|
||||
// enough to hold 4 bits per allocated word.
|
||||
if ptrSize == 8 && (limit == 0 || limit > 1<<30) {
|
||||
// On a 64-bit machine, allocate from a single contiguous reservation.
|
||||
// 128 GB (MaxMem) should be big enough for now.
|
||||
//
|
||||
// The code will work with the reservation at any address, but ask
|
||||
// SysReserve to use 0x0000XXc000000000 if possible (XX=00...7f).
|
||||
// Allocating a 128 GB region takes away 37 bits, and the amd64
|
||||
// doesn't let us choose the top 17 bits, so that leaves the 11 bits
|
||||
// in the middle of 0x00c0 for us to choose. Choosing 0x00c0 means
|
||||
// that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x00df.
|
||||
// In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid
|
||||
// UTF-8 sequences, and they are otherwise as far away from
|
||||
// ff (likely a common byte) as possible. If that fails, we try other 0xXXc0
|
||||
// addresses. An earlier attempt to use 0x11f8 caused out of memory errors
|
||||
// on OS X during thread allocations. 0x00c0 causes conflicts with
|
||||
// AddressSanitizer which reserves all memory up to 0x0100.
|
||||
// These choices are both for debuggability and to reduce the
|
||||
// odds of the conservative garbage collector not collecting memory
|
||||
// because some non-pointer block of memory had a bit pattern
|
||||
// that matched a memory address.
|
||||
//
|
||||
// Actually we reserve 136 GB (because the bitmap ends up being 8 GB)
|
||||
// but it hardly matters: e0 00 is not valid UTF-8 either.
|
||||
//
|
||||
// If this fails we fall back to the 32 bit memory mechanism
|
||||
arenaSize := round(_MaxMem, _PageSize)
|
||||
bitmapSize = arenaSize / (ptrSize * 8 / 4)
|
||||
spansSize = arenaSize / _PageSize * ptrSize
|
||||
spansSize = round(spansSize, _PageSize)
|
||||
for i := 0; i <= 0x7f; i++ {
|
||||
p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
|
||||
pSize = bitmapSize + spansSize + arenaSize + _PageSize
|
||||
p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved))
|
||||
if p != 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if p == 0 {
|
||||
// On a 32-bit machine, we can't typically get away
|
||||
// with a giant virtual address space reservation.
|
||||
// Instead we map the memory information bitmap
|
||||
// immediately after the data segment, large enough
|
||||
// to handle another 2GB of mappings (256 MB),
|
||||
// along with a reservation for an initial arena.
|
||||
// When that gets used up, we'll start asking the kernel
|
||||
// for any memory anywhere and hope it's in the 2GB
|
||||
// following the bitmap (presumably the executable begins
|
||||
// near the bottom of memory, so we'll have to use up
|
||||
// most of memory before the kernel resorts to giving out
|
||||
// memory before the beginning of the text segment).
|
||||
//
|
||||
// Alternatively we could reserve 512 MB bitmap, enough
|
||||
// for 4GB of mappings, and then accept any memory the
|
||||
// kernel threw at us, but normally that's a waste of 512 MB
|
||||
// of address space, which is probably too much in a 32-bit world.
|
||||
|
||||
// If we fail to allocate, try again with a smaller arena.
|
||||
// This is necessary on Android L where we share a process
|
||||
// with ART, which reserves virtual memory aggressively.
|
||||
arenaSizes := []uintptr{
|
||||
512 << 20,
|
||||
256 << 20,
|
||||
}
|
||||
|
||||
for _, arenaSize := range arenaSizes {
|
||||
bitmapSize = _MaxArena32 / (ptrSize * 8 / 4)
|
||||
spansSize = _MaxArena32 / _PageSize * ptrSize
|
||||
if limit > 0 && arenaSize+bitmapSize+spansSize > limit {
|
||||
bitmapSize = (limit / 9) &^ ((1 << _PageShift) - 1)
|
||||
arenaSize = bitmapSize * 8
|
||||
spansSize = arenaSize / _PageSize * ptrSize
|
||||
}
|
||||
spansSize = round(spansSize, _PageSize)
|
||||
|
||||
// SysReserve treats the address we ask for, end, as a hint,
|
||||
// not as an absolute requirement. If we ask for the end
|
||||
// of the data segment but the operating system requires
|
||||
// a little more space before we can start allocating, it will
|
||||
// give out a slightly higher pointer. Except QEMU, which
|
||||
// is buggy, as usual: it won't adjust the pointer upward.
|
||||
// So adjust it upward a little bit ourselves: 1/4 MB to get
|
||||
// away from the running binary image and then round up
|
||||
// to a MB boundary.
|
||||
p = round(uintptr(unsafe.Pointer(&end))+(1<<18), 1<<20)
|
||||
pSize = bitmapSize + spansSize + arenaSize + _PageSize
|
||||
p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved))
|
||||
if p != 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if p == 0 {
|
||||
throw("runtime: cannot reserve arena virtual address space")
|
||||
}
|
||||
}
|
||||
|
||||
// PageSize can be larger than OS definition of page size,
|
||||
// so SysReserve can give us a PageSize-unaligned pointer.
|
||||
// To overcome this we ask for PageSize more and round up the pointer.
|
||||
p1 := round(p, _PageSize)
|
||||
|
||||
mheap_.spans = (**mspan)(unsafe.Pointer(p1))
|
||||
mheap_.bitmap = p1 + spansSize
|
||||
mheap_.arena_start = p1 + (spansSize + bitmapSize)
|
||||
mheap_.arena_used = mheap_.arena_start
|
||||
mheap_.arena_end = p + pSize
|
||||
mheap_.arena_reserved = reserved
|
||||
|
||||
if mheap_.arena_start&(_PageSize-1) != 0 {
|
||||
println("bad pagesize", hex(p), hex(p1), hex(spansSize), hex(bitmapSize), hex(_PageSize), "start", hex(mheap_.arena_start))
|
||||
throw("misrounded allocation in mallocinit")
|
||||
}
|
||||
|
||||
// Initialize the rest of the allocator.
|
||||
mHeap_Init(&mheap_, spansSize)
|
||||
_g_ := getg()
|
||||
_g_.m.mcache = allocmcache()
|
||||
}
|
||||
|
||||
// sysReserveHigh reserves space somewhere high in the address space.
|
||||
// sysReserve doesn't actually reserve the full amount requested on
|
||||
// 64-bit systems, because of problems with ulimit. Instead it checks
|
||||
// that it can get the first 64 kB and assumes it can grab the rest as
|
||||
// needed. This doesn't work well with the "let the kernel pick an address"
|
||||
// mode, so don't do that. Pick a high address instead.
|
||||
func sysReserveHigh(n uintptr, reserved *bool) unsafe.Pointer {
|
||||
if ptrSize == 4 {
|
||||
return sysReserve(nil, n, reserved)
|
||||
}
|
||||
|
||||
for i := 0; i <= 0x7f; i++ {
|
||||
p := uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
|
||||
*reserved = false
|
||||
p = uintptr(sysReserve(unsafe.Pointer(p), n, reserved))
|
||||
if p != 0 {
|
||||
return unsafe.Pointer(p)
|
||||
}
|
||||
}
|
||||
|
||||
return sysReserve(nil, n, reserved)
|
||||
}
|
||||
|
||||
func mHeap_SysAlloc(h *mheap, n uintptr) unsafe.Pointer {
|
||||
if n > uintptr(h.arena_end)-uintptr(h.arena_used) {
|
||||
// We are in 32-bit mode, maybe we didn't use all possible address space yet.
|
||||
// Reserve some more space.
|
||||
p_size := round(n+_PageSize, 256<<20)
|
||||
new_end := h.arena_end + p_size
|
||||
if new_end <= h.arena_start+_MaxArena32 {
|
||||
// TODO: It would be bad if part of the arena
|
||||
// is reserved and part is not.
|
||||
var reserved bool
|
||||
p := uintptr(sysReserve((unsafe.Pointer)(h.arena_end), p_size, &reserved))
|
||||
if p == h.arena_end {
|
||||
h.arena_end = new_end
|
||||
h.arena_reserved = reserved
|
||||
} else if p+p_size <= h.arena_start+_MaxArena32 {
|
||||
// Keep everything page-aligned.
|
||||
// Our pages are bigger than hardware pages.
|
||||
h.arena_end = p + p_size
|
||||
h.arena_used = p + (-uintptr(p) & (_PageSize - 1))
|
||||
h.arena_reserved = reserved
|
||||
} else {
|
||||
var stat uint64
|
||||
sysFree((unsafe.Pointer)(p), p_size, &stat)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if n <= uintptr(h.arena_end)-uintptr(h.arena_used) {
|
||||
// Keep taking from our reservation.
|
||||
p := h.arena_used
|
||||
sysMap((unsafe.Pointer)(p), n, h.arena_reserved, &memstats.heap_sys)
|
||||
h.arena_used += n
|
||||
mHeap_MapBits(h)
|
||||
mHeap_MapSpans(h)
|
||||
if raceenabled {
|
||||
racemapshadow((unsafe.Pointer)(p), n)
|
||||
}
|
||||
if mheap_.shadow_enabled {
|
||||
sysMap(unsafe.Pointer(p+mheap_.shadow_heap), n, h.shadow_reserved, &memstats.other_sys)
|
||||
}
|
||||
|
||||
if uintptr(p)&(_PageSize-1) != 0 {
|
||||
throw("misrounded allocation in MHeap_SysAlloc")
|
||||
}
|
||||
return (unsafe.Pointer)(p)
|
||||
}
|
||||
|
||||
// If using 64-bit, our reservation is all we have.
|
||||
if uintptr(h.arena_end)-uintptr(h.arena_start) >= _MaxArena32 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// On 32-bit, once the reservation is gone we can
|
||||
// try to get memory at a location chosen by the OS
|
||||
// and hope that it is in the range we allocated bitmap for.
|
||||
p_size := round(n, _PageSize) + _PageSize
|
||||
p := uintptr(sysAlloc(p_size, &memstats.heap_sys))
|
||||
if p == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if p < h.arena_start || uintptr(p)+p_size-uintptr(h.arena_start) >= _MaxArena32 {
|
||||
print("runtime: memory allocated by OS (", p, ") not in usable range [", hex(h.arena_start), ",", hex(h.arena_start+_MaxArena32), ")\n")
|
||||
sysFree((unsafe.Pointer)(p), p_size, &memstats.heap_sys)
|
||||
return nil
|
||||
}
|
||||
|
||||
p_end := p + p_size
|
||||
p += -p & (_PageSize - 1)
|
||||
if uintptr(p)+n > uintptr(h.arena_used) {
|
||||
h.arena_used = p + n
|
||||
if p_end > h.arena_end {
|
||||
h.arena_end = p_end
|
||||
}
|
||||
mHeap_MapBits(h)
|
||||
mHeap_MapSpans(h)
|
||||
if raceenabled {
|
||||
racemapshadow((unsafe.Pointer)(p), n)
|
||||
}
|
||||
}
|
||||
|
||||
if uintptr(p)&(_PageSize-1) != 0 {
|
||||
throw("misrounded allocation in MHeap_SysAlloc")
|
||||
}
|
||||
return (unsafe.Pointer)(p)
|
||||
}
|
||||
|
||||
var end struct{}
|
||||
|
||||
func largeAlloc(size uintptr, flag uint32) *mspan {
|
||||
// print("largeAlloc size=", size, "\n")
|
||||
|
||||
if size+_PageSize < size {
|
||||
throw("out of memory")
|
||||
}
|
||||
npages := size >> _PageShift
|
||||
if size&_PageMask != 0 {
|
||||
npages++
|
||||
}
|
||||
s := mHeap_Alloc(&mheap_, npages, 0, true, flag&_FlagNoZero == 0)
|
||||
if s == nil {
|
||||
throw("out of memory")
|
||||
}
|
||||
s.limit = uintptr(s.start)<<_PageShift + size
|
||||
heapBitsForSpan(s.base()).initSpan(s.layout())
|
||||
return s
|
||||
}
|
||||
|
|
@ -1,525 +0,0 @@
|
|||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package runtime
|
||||
|
||||
import "unsafe"
|
||||
|
||||
// Memory allocator, based on tcmalloc.
|
||||
// http://goog-perftools.sourceforge.net/doc/tcmalloc.html
|
||||
|
||||
// The main allocator works in runs of pages.
|
||||
// Small allocation sizes (up to and including 32 kB) are
|
||||
// rounded to one of about 100 size classes, each of which
|
||||
// has its own free list of objects of exactly that size.
|
||||
// Any free page of memory can be split into a set of objects
|
||||
// of one size class, which are then managed using free list
|
||||
// allocators.
|
||||
//
|
||||
// The allocator's data structures are:
|
||||
//
|
||||
// FixAlloc: a free-list allocator for fixed-size objects,
|
||||
// used to manage storage used by the allocator.
|
||||
// MHeap: the malloc heap, managed at page (4096-byte) granularity.
|
||||
// MSpan: a run of pages managed by the MHeap.
|
||||
// MCentral: a shared free list for a given size class.
|
||||
// MCache: a per-thread (in Go, per-P) cache for small objects.
|
||||
// MStats: allocation statistics.
|
||||
//
|
||||
// Allocating a small object proceeds up a hierarchy of caches:
|
||||
//
|
||||
// 1. Round the size up to one of the small size classes
|
||||
// and look in the corresponding MCache free list.
|
||||
// If the list is not empty, allocate an object from it.
|
||||
// This can all be done without acquiring a lock.
|
||||
//
|
||||
// 2. If the MCache free list is empty, replenish it by
|
||||
// taking a bunch of objects from the MCentral free list.
|
||||
// Moving a bunch amortizes the cost of acquiring the MCentral lock.
|
||||
//
|
||||
// 3. If the MCentral free list is empty, replenish it by
|
||||
// allocating a run of pages from the MHeap and then
|
||||
// chopping that memory into objects of the given size.
|
||||
// Allocating many objects amortizes the cost of locking
|
||||
// the heap.
|
||||
//
|
||||
// 4. If the MHeap is empty or has no page runs large enough,
|
||||
// allocate a new group of pages (at least 1MB) from the
|
||||
// operating system. Allocating a large run of pages
|
||||
// amortizes the cost of talking to the operating system.
|
||||
//
|
||||
// Freeing a small object proceeds up the same hierarchy:
|
||||
//
|
||||
// 1. Look up the size class for the object and add it to
|
||||
// the MCache free list.
|
||||
//
|
||||
// 2. If the MCache free list is too long or the MCache has
|
||||
// too much memory, return some to the MCentral free lists.
|
||||
//
|
||||
// 3. If all the objects in a given span have returned to
|
||||
// the MCentral list, return that span to the page heap.
|
||||
//
|
||||
// 4. If the heap has too much memory, return some to the
|
||||
// operating system.
|
||||
//
|
||||
// TODO(rsc): Step 4 is not implemented.
|
||||
//
|
||||
// Allocating and freeing a large object uses the page heap
|
||||
// directly, bypassing the MCache and MCentral free lists.
|
||||
//
|
||||
// The small objects on the MCache and MCentral free lists
|
||||
// may or may not be zeroed. They are zeroed if and only if
|
||||
// the second word of the object is zero. A span in the
|
||||
// page heap is zeroed unless s->needzero is set. When a span
|
||||
// is allocated to break into small objects, it is zeroed if needed
|
||||
// and s->needzero is set. There are two main benefits to delaying the
|
||||
// zeroing this way:
|
||||
//
|
||||
// 1. stack frames allocated from the small object lists
|
||||
// or the page heap can avoid zeroing altogether.
|
||||
// 2. the cost of zeroing when reusing a small object is
|
||||
// charged to the mutator, not the garbage collector.
|
||||
//
|
||||
// This C code was written with an eye toward translating to Go
|
||||
// in the future. Methods have the form Type_Method(Type *t, ...).
|
||||
|
||||
const (
|
||||
_PageShift = 13
|
||||
_PageSize = 1 << _PageShift
|
||||
_PageMask = _PageSize - 1
|
||||
)
|
||||
|
||||
const (
|
||||
// _64bit = 1 on 64-bit systems, 0 on 32-bit systems
|
||||
_64bit = 1 << (^uintptr(0) >> 63) / 2
|
||||
|
||||
// Computed constant. The definition of MaxSmallSize and the
|
||||
// algorithm in msize.c produce some number of different allocation
|
||||
// size classes. NumSizeClasses is that number. It's needed here
|
||||
// because there are static arrays of this length; when msize runs its
|
||||
// size choosing algorithm it double-checks that NumSizeClasses agrees.
|
||||
_NumSizeClasses = 67
|
||||
|
||||
// Tunable constants.
|
||||
_MaxSmallSize = 32 << 10
|
||||
|
||||
// Tiny allocator parameters, see "Tiny allocator" comment in malloc.go.
|
||||
_TinySize = 16
|
||||
_TinySizeClass = 2
|
||||
|
||||
_FixAllocChunk = 16 << 10 // Chunk size for FixAlloc
|
||||
_MaxMHeapList = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap.
|
||||
_HeapAllocChunk = 1 << 20 // Chunk size for heap growth
|
||||
|
||||
// Per-P, per order stack segment cache size.
|
||||
_StackCacheSize = 32 * 1024
|
||||
|
||||
// Number of orders that get caching. Order 0 is FixedStack
|
||||
// and each successive order is twice as large.
|
||||
// We want to cache 2KB, 4KB, 8KB, and 16KB stacks. Larger stacks
|
||||
// will be allocated directly.
|
||||
// Since FixedStack is different on different systems, we
|
||||
// must vary NumStackOrders to keep the same maximum cached size.
|
||||
// OS | FixedStack | NumStackOrders
|
||||
// -----------------+------------+---------------
|
||||
// linux/darwin/bsd | 2KB | 4
|
||||
// windows/32 | 4KB | 3
|
||||
// windows/64 | 8KB | 2
|
||||
// plan9 | 4KB | 3
|
||||
_NumStackOrders = 4 - ptrSize/4*goos_windows - 1*goos_plan9
|
||||
|
||||
// Number of bits in page to span calculations (4k pages).
|
||||
// On Windows 64-bit we limit the arena to 32GB or 35 bits.
|
||||
// Windows counts memory used by page table into committed memory
|
||||
// of the process, so we can't reserve too much memory.
|
||||
// See http://golang.org/issue/5402 and http://golang.org/issue/5236.
|
||||
// On other 64-bit platforms, we limit the arena to 128GB, or 37 bits.
|
||||
// On 32-bit, we don't bother limiting anything, so we use the full 32-bit address.
|
||||
_MHeapMap_TotalBits = (_64bit*goos_windows)*35 + (_64bit*(1-goos_windows))*37 + (1-_64bit)*32
|
||||
_MHeapMap_Bits = _MHeapMap_TotalBits - _PageShift
|
||||
|
||||
_MaxMem = uintptr(1<<_MHeapMap_TotalBits - 1)
|
||||
|
||||
// Max number of threads to run garbage collection.
|
||||
// 2, 3, and 4 are all plausible maximums depending
|
||||
// on the hardware details of the machine. The garbage
|
||||
// collector scales well to 32 cpus.
|
||||
_MaxGcproc = 32
|
||||
)
|
||||
|
||||
// A generic linked list of blocks. (Typically the block is bigger than sizeof(MLink).)
|
||||
// Since assignments to mlink.next will result in a write barrier being preformed
|
||||
// this can not be used by some of the internal GC structures. For example when
|
||||
// the sweeper is placing an unmarked object on the free list it does not want the
|
||||
// write barrier to be called since that could result in the object being reachable.
|
||||
type mlink struct {
|
||||
next *mlink
|
||||
}
|
||||
|
||||
// A gclink is a node in a linked list of blocks, like mlink,
|
||||
// but it is opaque to the garbage collector.
|
||||
// The GC does not trace the pointers during collection,
|
||||
// and the compiler does not emit write barriers for assignments
|
||||
// of gclinkptr values. Code should store references to gclinks
|
||||
// as gclinkptr, not as *gclink.
|
||||
type gclink struct {
|
||||
next gclinkptr
|
||||
}
|
||||
|
||||
// A gclinkptr is a pointer to a gclink, but it is opaque
|
||||
// to the garbage collector.
|
||||
type gclinkptr uintptr
|
||||
|
||||
// ptr returns the *gclink form of p.
|
||||
// The result should be used for accessing fields, not stored
|
||||
// in other data structures.
|
||||
func (p gclinkptr) ptr() *gclink {
|
||||
return (*gclink)(unsafe.Pointer(p))
|
||||
}
|
||||
|
||||
// sysAlloc obtains a large chunk of zeroed memory from the
|
||||
// operating system, typically on the order of a hundred kilobytes
|
||||
// or a megabyte.
|
||||
// NOTE: sysAlloc returns OS-aligned memory, but the heap allocator
|
||||
// may use larger alignment, so the caller must be careful to realign the
|
||||
// memory obtained by sysAlloc.
|
||||
//
|
||||
// SysUnused notifies the operating system that the contents
|
||||
// of the memory region are no longer needed and can be reused
|
||||
// for other purposes.
|
||||
// SysUsed notifies the operating system that the contents
|
||||
// of the memory region are needed again.
|
||||
//
|
||||
// SysFree returns it unconditionally; this is only used if
|
||||
// an out-of-memory error has been detected midway through
|
||||
// an allocation. It is okay if SysFree is a no-op.
|
||||
//
|
||||
// SysReserve reserves address space without allocating memory.
|
||||
// If the pointer passed to it is non-nil, the caller wants the
|
||||
// reservation there, but SysReserve can still choose another
|
||||
// location if that one is unavailable. On some systems and in some
|
||||
// cases SysReserve will simply check that the address space is
|
||||
// available and not actually reserve it. If SysReserve returns
|
||||
// non-nil, it sets *reserved to true if the address space is
|
||||
// reserved, false if it has merely been checked.
|
||||
// NOTE: SysReserve returns OS-aligned memory, but the heap allocator
|
||||
// may use larger alignment, so the caller must be careful to realign the
|
||||
// memory obtained by sysAlloc.
|
||||
//
|
||||
// SysMap maps previously reserved address space for use.
|
||||
// The reserved argument is true if the address space was really
|
||||
// reserved, not merely checked.
|
||||
//
|
||||
// SysFault marks a (already sysAlloc'd) region to fault
|
||||
// if accessed. Used only for debugging the runtime.
|
||||
|
||||
// FixAlloc is a simple free-list allocator for fixed size objects.
|
||||
// Malloc uses a FixAlloc wrapped around sysAlloc to manages its
|
||||
// MCache and MSpan objects.
|
||||
//
|
||||
// Memory returned by FixAlloc_Alloc is not zeroed.
|
||||
// The caller is responsible for locking around FixAlloc calls.
|
||||
// Callers can keep state in the object but the first word is
|
||||
// smashed by freeing and reallocating.
|
||||
type fixalloc struct {
|
||||
size uintptr
|
||||
first unsafe.Pointer // go func(unsafe.pointer, unsafe.pointer); f(arg, p) called first time p is returned
|
||||
arg unsafe.Pointer
|
||||
list *mlink
|
||||
chunk *byte
|
||||
nchunk uint32
|
||||
inuse uintptr // in-use bytes now
|
||||
stat *uint64
|
||||
}
|
||||
|
||||
// Statistics.
|
||||
// Shared with Go: if you edit this structure, also edit type MemStats in mem.go.
|
||||
type mstats struct {
|
||||
// General statistics.
|
||||
alloc uint64 // bytes allocated and still in use
|
||||
total_alloc uint64 // bytes allocated (even if freed)
|
||||
sys uint64 // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate)
|
||||
nlookup uint64 // number of pointer lookups
|
||||
nmalloc uint64 // number of mallocs
|
||||
nfree uint64 // number of frees
|
||||
|
||||
// Statistics about malloc heap.
|
||||
// protected by mheap.lock
|
||||
heap_alloc uint64 // bytes allocated and still in use
|
||||
heap_sys uint64 // bytes obtained from system
|
||||
heap_idle uint64 // bytes in idle spans
|
||||
heap_inuse uint64 // bytes in non-idle spans
|
||||
heap_released uint64 // bytes released to the os
|
||||
heap_objects uint64 // total number of allocated objects
|
||||
|
||||
// Statistics about allocation of low-level fixed-size structures.
|
||||
// Protected by FixAlloc locks.
|
||||
stacks_inuse uint64 // this number is included in heap_inuse above
|
||||
stacks_sys uint64 // always 0 in mstats
|
||||
mspan_inuse uint64 // mspan structures
|
||||
mspan_sys uint64
|
||||
mcache_inuse uint64 // mcache structures
|
||||
mcache_sys uint64
|
||||
buckhash_sys uint64 // profiling bucket hash table
|
||||
gc_sys uint64
|
||||
other_sys uint64
|
||||
|
||||
// Statistics about garbage collector.
|
||||
// Protected by mheap or stopping the world during GC.
|
||||
next_gc uint64 // next gc (in heap_alloc time)
|
||||
last_gc uint64 // last gc (in absolute time)
|
||||
pause_total_ns uint64
|
||||
pause_ns [256]uint64 // circular buffer of recent gc pause lengths
|
||||
pause_end [256]uint64 // circular buffer of recent gc end times (nanoseconds since 1970)
|
||||
numgc uint32
|
||||
enablegc bool
|
||||
debuggc bool
|
||||
|
||||
// Statistics about allocation size classes.
|
||||
|
||||
by_size [_NumSizeClasses]struct {
|
||||
size uint32
|
||||
nmalloc uint64
|
||||
nfree uint64
|
||||
}
|
||||
|
||||
tinyallocs uint64 // number of tiny allocations that didn't cause actual allocation; not exported to go directly
|
||||
}
|
||||
|
||||
var memstats mstats
|
||||
|
||||
// Size classes. Computed and initialized by InitSizes.
|
||||
//
|
||||
// SizeToClass(0 <= n <= MaxSmallSize) returns the size class,
|
||||
// 1 <= sizeclass < NumSizeClasses, for n.
|
||||
// Size class 0 is reserved to mean "not small".
|
||||
//
|
||||
// class_to_size[i] = largest size in class i
|
||||
// class_to_allocnpages[i] = number of pages to allocate when
|
||||
// making new objects in class i
|
||||
|
||||
var class_to_size [_NumSizeClasses]int32
|
||||
var class_to_allocnpages [_NumSizeClasses]int32
|
||||
var size_to_class8 [1024/8 + 1]int8
|
||||
var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8
|
||||
|
||||
type mcachelist struct {
|
||||
list *mlink
|
||||
nlist uint32
|
||||
}
|
||||
|
||||
type stackfreelist struct {
|
||||
list gclinkptr // linked list of free stacks
|
||||
size uintptr // total size of stacks in list
|
||||
}
|
||||
|
||||
// Per-thread (in Go, per-P) cache for small objects.
|
||||
// No locking needed because it is per-thread (per-P).
|
||||
type mcache struct {
|
||||
// The following members are accessed on every malloc,
|
||||
// so they are grouped here for better caching.
|
||||
next_sample int32 // trigger heap sample after allocating this many bytes
|
||||
local_cachealloc intptr // bytes allocated (or freed) from cache since last lock of heap
|
||||
// Allocator cache for tiny objects w/o pointers.
|
||||
// See "Tiny allocator" comment in malloc.go.
|
||||
tiny unsafe.Pointer
|
||||
tinyoffset uintptr
|
||||
local_tinyallocs uintptr // number of tiny allocs not counted in other stats
|
||||
|
||||
// The rest is not accessed on every malloc.
|
||||
alloc [_NumSizeClasses]*mspan // spans to allocate from
|
||||
|
||||
stackcache [_NumStackOrders]stackfreelist
|
||||
|
||||
sudogcache *sudog
|
||||
|
||||
// Local allocator stats, flushed during GC.
|
||||
local_nlookup uintptr // number of pointer lookups
|
||||
local_largefree uintptr // bytes freed for large objects (>maxsmallsize)
|
||||
local_nlargefree uintptr // number of frees for large objects (>maxsmallsize)
|
||||
local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize)
|
||||
}
|
||||
|
||||
const (
|
||||
_KindSpecialFinalizer = 1
|
||||
_KindSpecialProfile = 2
|
||||
// Note: The finalizer special must be first because if we're freeing
|
||||
// an object, a finalizer special will cause the freeing operation
|
||||
// to abort, and we want to keep the other special records around
|
||||
// if that happens.
|
||||
)
|
||||
|
||||
type special struct {
|
||||
next *special // linked list in span
|
||||
offset uint16 // span offset of object
|
||||
kind byte // kind of special
|
||||
}
|
||||
|
||||
// The described object has a finalizer set for it.
|
||||
type specialfinalizer struct {
|
||||
special special
|
||||
fn *funcval
|
||||
nret uintptr
|
||||
fint *_type
|
||||
ot *ptrtype
|
||||
}
|
||||
|
||||
// The described object is being heap profiled.
|
||||
type specialprofile struct {
|
||||
special special
|
||||
b *bucket
|
||||
}
|
||||
|
||||
// An MSpan is a run of pages.
|
||||
const (
|
||||
_MSpanInUse = iota // allocated for garbage collected heap
|
||||
_MSpanStack // allocated for use by stack allocator
|
||||
_MSpanFree
|
||||
_MSpanListHead
|
||||
_MSpanDead
|
||||
)
|
||||
|
||||
type mspan struct {
|
||||
next *mspan // in a span linked list
|
||||
prev *mspan // in a span linked list
|
||||
start pageID // starting page number
|
||||
npages uintptr // number of pages in span
|
||||
freelist gclinkptr // list of free objects
|
||||
// sweep generation:
|
||||
// if sweepgen == h->sweepgen - 2, the span needs sweeping
|
||||
// if sweepgen == h->sweepgen - 1, the span is currently being swept
|
||||
// if sweepgen == h->sweepgen, the span is swept and ready to use
|
||||
// h->sweepgen is incremented by 2 after every GC
|
||||
sweepgen uint32
|
||||
ref uint16 // capacity - number of objects in freelist
|
||||
sizeclass uint8 // size class
|
||||
incache bool // being used by an mcache
|
||||
state uint8 // mspaninuse etc
|
||||
needzero uint8 // needs to be zeroed before allocation
|
||||
elemsize uintptr // computed from sizeclass or from npages
|
||||
unusedsince int64 // first time spotted by gc in mspanfree state
|
||||
npreleased uintptr // number of pages released to the os
|
||||
limit uintptr // end of data in span
|
||||
speciallock mutex // guards specials list
|
||||
specials *special // linked list of special records sorted by offset.
|
||||
}
|
||||
|
||||
func (s *mspan) base() uintptr {
|
||||
return uintptr(s.start << _PageShift)
|
||||
}
|
||||
|
||||
func (s *mspan) layout() (size, n, total uintptr) {
|
||||
total = s.npages << _PageShift
|
||||
size = s.elemsize
|
||||
if size > 0 {
|
||||
n = total / size
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Every MSpan is in one doubly-linked list,
|
||||
// either one of the MHeap's free lists or one of the
|
||||
// MCentral's span lists. We use empty MSpan structures as list heads.
|
||||
|
||||
// Central list of free objects of a given size.
|
||||
type mcentral struct {
|
||||
lock mutex
|
||||
sizeclass int32
|
||||
nonempty mspan // list of spans with a free object
|
||||
empty mspan // list of spans with no free objects (or cached in an mcache)
|
||||
}
|
||||
|
||||
// Main malloc heap.
|
||||
// The heap itself is the "free[]" and "large" arrays,
|
||||
// but all the other global data is here too.
|
||||
type mheap struct {
|
||||
lock mutex
|
||||
free [_MaxMHeapList]mspan // free lists of given length
|
||||
freelarge mspan // free lists length >= _MaxMHeapList
|
||||
busy [_MaxMHeapList]mspan // busy lists of large objects of given length
|
||||
busylarge mspan // busy lists of large objects length >= _MaxMHeapList
|
||||
allspans **mspan // all spans out there
|
||||
gcspans **mspan // copy of allspans referenced by gc marker or sweeper
|
||||
nspan uint32
|
||||
sweepgen uint32 // sweep generation, see comment in mspan
|
||||
sweepdone uint32 // all spans are swept
|
||||
|
||||
// span lookup
|
||||
spans **mspan
|
||||
spans_mapped uintptr
|
||||
|
||||
// range of addresses we might see in the heap
|
||||
bitmap uintptr
|
||||
bitmap_mapped uintptr
|
||||
arena_start uintptr
|
||||
arena_used uintptr
|
||||
arena_end uintptr
|
||||
arena_reserved bool
|
||||
|
||||
// write barrier shadow data+heap.
|
||||
// 64-bit systems only, enabled by GODEBUG=wbshadow=1.
|
||||
shadow_enabled bool // shadow should be updated and checked
|
||||
shadow_reserved bool // shadow memory is reserved
|
||||
shadow_heap uintptr // heap-addr + shadow_heap = shadow heap addr
|
||||
shadow_data uintptr // data-addr + shadow_data = shadow data addr
|
||||
data_start uintptr // start of shadowed data addresses
|
||||
data_end uintptr // end of shadowed data addresses
|
||||
|
||||
// central free lists for small size classes.
|
||||
// the padding makes sure that the MCentrals are
|
||||
// spaced CacheLineSize bytes apart, so that each MCentral.lock
|
||||
// gets its own cache line.
|
||||
central [_NumSizeClasses]struct {
|
||||
mcentral mcentral
|
||||
pad [_CacheLineSize]byte
|
||||
}
|
||||
|
||||
spanalloc fixalloc // allocator for span*
|
||||
cachealloc fixalloc // allocator for mcache*
|
||||
specialfinalizeralloc fixalloc // allocator for specialfinalizer*
|
||||
specialprofilealloc fixalloc // allocator for specialprofile*
|
||||
speciallock mutex // lock for sepcial record allocators.
|
||||
|
||||
// Malloc stats.
|
||||
largefree uint64 // bytes freed for large objects (>maxsmallsize)
|
||||
nlargefree uint64 // number of frees for large objects (>maxsmallsize)
|
||||
nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
|
||||
}
|
||||
|
||||
var mheap_ mheap
|
||||
|
||||
const (
|
||||
// flags to malloc
|
||||
_FlagNoScan = 1 << 0 // GC doesn't have to scan object
|
||||
_FlagNoZero = 1 << 1 // don't zero memory
|
||||
)
|
||||
|
||||
// NOTE: Layout known to queuefinalizer.
|
||||
type finalizer struct {
|
||||
fn *funcval // function to call
|
||||
arg unsafe.Pointer // ptr to object
|
||||
nret uintptr // bytes of return values from fn
|
||||
fint *_type // type of first argument of fn
|
||||
ot *ptrtype // type of ptr to object
|
||||
}
|
||||
|
||||
type finblock struct {
|
||||
alllink *finblock
|
||||
next *finblock
|
||||
cnt int32
|
||||
_ int32
|
||||
fin [(_FinBlockSize - 2*ptrSize - 2*4) / unsafe.Sizeof(finalizer{})]finalizer
|
||||
}
|
||||
|
||||
// Information from the compiler about the layout of stack frames.
|
||||
type bitvector struct {
|
||||
n int32 // # of bits
|
||||
bytedata *uint8
|
||||
}
|
||||
|
||||
type stackmap struct {
|
||||
n int32 // number of bitmaps
|
||||
nbit int32 // number of bits in each bitmap
|
||||
bytedata [1]byte // bitmaps, each starting on a 32-bit boundary
|
||||
}
|
||||
|
|
@ -82,6 +82,12 @@ const (
|
|||
typeShift = 2
|
||||
)
|
||||
|
||||
// Information from the compiler about the layout of stack frames.
|
||||
type bitvector struct {
|
||||
n int32 // # of bits
|
||||
bytedata *uint8
|
||||
}
|
||||
|
||||
// addb returns the byte pointer p+n.
|
||||
//go:nowritebarrier
|
||||
func addb(p *byte, n uintptr) *byte {
|
||||
|
|
|
|||
|
|
@ -2,14 +2,63 @@
|
|||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Per-P malloc cache for small objects.
|
||||
//
|
||||
// See malloc.h for an overview.
|
||||
|
||||
package runtime
|
||||
|
||||
import "unsafe"
|
||||
|
||||
// Per-thread (in Go, per-P) cache for small objects.
|
||||
// No locking needed because it is per-thread (per-P).
|
||||
type mcache struct {
|
||||
// The following members are accessed on every malloc,
|
||||
// so they are grouped here for better caching.
|
||||
next_sample int32 // trigger heap sample after allocating this many bytes
|
||||
local_cachealloc intptr // bytes allocated (or freed) from cache since last lock of heap
|
||||
// Allocator cache for tiny objects w/o pointers.
|
||||
// See "Tiny allocator" comment in malloc.go.
|
||||
tiny unsafe.Pointer
|
||||
tinyoffset uintptr
|
||||
local_tinyallocs uintptr // number of tiny allocs not counted in other stats
|
||||
|
||||
// The rest is not accessed on every malloc.
|
||||
alloc [_NumSizeClasses]*mspan // spans to allocate from
|
||||
|
||||
stackcache [_NumStackOrders]stackfreelist
|
||||
|
||||
sudogcache *sudog
|
||||
|
||||
// Local allocator stats, flushed during GC.
|
||||
local_nlookup uintptr // number of pointer lookups
|
||||
local_largefree uintptr // bytes freed for large objects (>maxsmallsize)
|
||||
local_nlargefree uintptr // number of frees for large objects (>maxsmallsize)
|
||||
local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize)
|
||||
}
|
||||
|
||||
// A gclink is a node in a linked list of blocks, like mlink,
|
||||
// but it is opaque to the garbage collector.
|
||||
// The GC does not trace the pointers during collection,
|
||||
// and the compiler does not emit write barriers for assignments
|
||||
// of gclinkptr values. Code should store references to gclinks
|
||||
// as gclinkptr, not as *gclink.
|
||||
type gclink struct {
|
||||
next gclinkptr
|
||||
}
|
||||
|
||||
// A gclinkptr is a pointer to a gclink, but it is opaque
|
||||
// to the garbage collector.
|
||||
type gclinkptr uintptr
|
||||
|
||||
// ptr returns the *gclink form of p.
|
||||
// The result should be used for accessing fields, not stored
|
||||
// in other data structures.
|
||||
func (p gclinkptr) ptr() *gclink {
|
||||
return (*gclink)(unsafe.Pointer(p))
|
||||
}
|
||||
|
||||
type stackfreelist struct {
|
||||
list gclinkptr // linked list of free stacks
|
||||
size uintptr // total size of stacks in list
|
||||
}
|
||||
|
||||
// dummy MSpan that contains no free objects.
|
||||
var emptymspan mspan
|
||||
|
||||
|
|
|
|||
|
|
@ -12,6 +12,14 @@
|
|||
|
||||
package runtime
|
||||
|
||||
// Central list of free objects of a given size.
|
||||
type mcentral struct {
|
||||
lock mutex
|
||||
sizeclass int32
|
||||
nonempty mspan // list of spans with a free object
|
||||
empty mspan // list of spans with no free objects (or cached in an mcache)
|
||||
}
|
||||
|
||||
// Initialize a single central free list.
|
||||
func mCentral_Init(c *mcentral, sizeclass int32) {
|
||||
c.sizeclass = sizeclass
|
||||
|
|
|
|||
|
|
@ -1,114 +0,0 @@
|
|||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package runtime
|
||||
|
||||
import "unsafe"
|
||||
|
||||
// Note: the MemStats struct should be kept in sync with
|
||||
// struct MStats in malloc.h
|
||||
|
||||
// A MemStats records statistics about the memory allocator.
|
||||
type MemStats struct {
|
||||
// General statistics.
|
||||
Alloc uint64 // bytes allocated and still in use
|
||||
TotalAlloc uint64 // bytes allocated (even if freed)
|
||||
Sys uint64 // bytes obtained from system (sum of XxxSys below)
|
||||
Lookups uint64 // number of pointer lookups
|
||||
Mallocs uint64 // number of mallocs
|
||||
Frees uint64 // number of frees
|
||||
|
||||
// Main allocation heap statistics.
|
||||
HeapAlloc uint64 // bytes allocated and still in use
|
||||
HeapSys uint64 // bytes obtained from system
|
||||
HeapIdle uint64 // bytes in idle spans
|
||||
HeapInuse uint64 // bytes in non-idle span
|
||||
HeapReleased uint64 // bytes released to the OS
|
||||
HeapObjects uint64 // total number of allocated objects
|
||||
|
||||
// Low-level fixed-size structure allocator statistics.
|
||||
// Inuse is bytes used now.
|
||||
// Sys is bytes obtained from system.
|
||||
StackInuse uint64 // bytes used by stack allocator
|
||||
StackSys uint64
|
||||
MSpanInuse uint64 // mspan structures
|
||||
MSpanSys uint64
|
||||
MCacheInuse uint64 // mcache structures
|
||||
MCacheSys uint64
|
||||
BuckHashSys uint64 // profiling bucket hash table
|
||||
GCSys uint64 // GC metadata
|
||||
OtherSys uint64 // other system allocations
|
||||
|
||||
// Garbage collector statistics.
|
||||
NextGC uint64 // next collection will happen when HeapAlloc ≥ this amount
|
||||
LastGC uint64 // end time of last collection (nanoseconds since 1970)
|
||||
PauseTotalNs uint64
|
||||
PauseNs [256]uint64 // circular buffer of recent GC pause durations, most recent at [(NumGC+255)%256]
|
||||
PauseEnd [256]uint64 // circular buffer of recent GC pause end times
|
||||
NumGC uint32
|
||||
EnableGC bool
|
||||
DebugGC bool
|
||||
|
||||
// Per-size allocation statistics.
|
||||
// 61 is NumSizeClasses in the C code.
|
||||
BySize [61]struct {
|
||||
Size uint32
|
||||
Mallocs uint64
|
||||
Frees uint64
|
||||
}
|
||||
}
|
||||
|
||||
// Size of the trailing by_size array differs between Go and C,
|
||||
// and all data after by_size is local to runtime, not exported.
|
||||
// NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
|
||||
// sizeof_C_MStats is what C thinks about size of Go struct.
|
||||
var sizeof_C_MStats = unsafe.Offsetof(memstats.by_size) + 61*unsafe.Sizeof(memstats.by_size[0])
|
||||
|
||||
func init() {
|
||||
var memStats MemStats
|
||||
if sizeof_C_MStats != unsafe.Sizeof(memStats) {
|
||||
println(sizeof_C_MStats, unsafe.Sizeof(memStats))
|
||||
throw("MStats vs MemStatsType size mismatch")
|
||||
}
|
||||
}
|
||||
|
||||
// ReadMemStats populates m with memory allocator statistics.
|
||||
func ReadMemStats(m *MemStats) {
|
||||
// Have to acquire worldsema to stop the world,
|
||||
// because stoptheworld can only be used by
|
||||
// one goroutine at a time, and there might be
|
||||
// a pending garbage collection already calling it.
|
||||
semacquire(&worldsema, false)
|
||||
gp := getg()
|
||||
gp.m.preemptoff = "read mem stats"
|
||||
systemstack(stoptheworld)
|
||||
|
||||
systemstack(func() {
|
||||
readmemstats_m(m)
|
||||
})
|
||||
|
||||
gp.m.preemptoff = ""
|
||||
gp.m.locks++
|
||||
semrelease(&worldsema)
|
||||
systemstack(starttheworld)
|
||||
gp.m.locks--
|
||||
}
|
||||
|
||||
//go:linkname runtime_debug_WriteHeapDump runtime/debug.WriteHeapDump
|
||||
func runtime_debug_WriteHeapDump(fd uintptr) {
|
||||
semacquire(&worldsema, false)
|
||||
gp := getg()
|
||||
gp.m.preemptoff = "write heap dump"
|
||||
systemstack(stoptheworld)
|
||||
|
||||
systemstack(func() {
|
||||
writeheapdump_m(fd)
|
||||
})
|
||||
|
||||
gp.m.preemptoff = ""
|
||||
gp.m.locks++
|
||||
semrelease(&worldsema)
|
||||
systemstack(starttheworld)
|
||||
gp.m.locks--
|
||||
}
|
||||
|
|
@ -8,6 +8,14 @@ package runtime
|
|||
|
||||
import "unsafe"
|
||||
|
||||
type finblock struct {
|
||||
alllink *finblock
|
||||
next *finblock
|
||||
cnt int32
|
||||
_ int32
|
||||
fin [(_FinBlockSize - 2*ptrSize - 2*4) / unsafe.Sizeof(finalizer{})]finalizer
|
||||
}
|
||||
|
||||
var finlock mutex // protects the following variables
|
||||
var fing *g // goroutine that runs finalizers
|
||||
var finq *finblock // list of finalizers that are to be executed
|
||||
|
|
@ -17,6 +25,15 @@ var fingwait bool
|
|||
var fingwake bool
|
||||
var allfin *finblock // list of all blocks
|
||||
|
||||
// NOTE: Layout known to queuefinalizer.
|
||||
type finalizer struct {
|
||||
fn *funcval // function to call
|
||||
arg unsafe.Pointer // ptr to object
|
||||
nret uintptr // bytes of return values from fn
|
||||
fint *_type // type of first argument of fn
|
||||
ot *ptrtype // type of ptr to object
|
||||
}
|
||||
|
||||
var finalizer1 = [...]byte{
|
||||
// Each Finalizer is 5 words, ptr ptr uintptr ptr ptr.
|
||||
// Each byte describes 4 words.
|
||||
|
|
|
|||
|
|
@ -10,6 +10,34 @@ package runtime
|
|||
|
||||
import "unsafe"
|
||||
|
||||
// FixAlloc is a simple free-list allocator for fixed size objects.
|
||||
// Malloc uses a FixAlloc wrapped around sysAlloc to manages its
|
||||
// MCache and MSpan objects.
|
||||
//
|
||||
// Memory returned by FixAlloc_Alloc is not zeroed.
|
||||
// The caller is responsible for locking around FixAlloc calls.
|
||||
// Callers can keep state in the object but the first word is
|
||||
// smashed by freeing and reallocating.
|
||||
type fixalloc struct {
|
||||
size uintptr
|
||||
first unsafe.Pointer // go func(unsafe.pointer, unsafe.pointer); f(arg, p) called first time p is returned
|
||||
arg unsafe.Pointer
|
||||
list *mlink
|
||||
chunk *byte
|
||||
nchunk uint32
|
||||
inuse uintptr // in-use bytes now
|
||||
stat *uint64
|
||||
}
|
||||
|
||||
// A generic linked list of blocks. (Typically the block is bigger than sizeof(MLink).)
|
||||
// Since assignments to mlink.next will result in a write barrier being preformed
|
||||
// this can not be used by some of the internal GC structures. For example when
|
||||
// the sweeper is placing an unmarked object on the free list it does not want the
|
||||
// write barrier to be called since that could result in the object being reachable.
|
||||
type mlink struct {
|
||||
next *mlink
|
||||
}
|
||||
|
||||
// Initialize f to allocate objects of the given size,
|
||||
// using the allocator to obtain chunks of memory.
|
||||
func fixAlloc_Init(f *fixalloc, size uintptr, first func(unsafe.Pointer, unsafe.Pointer), arg unsafe.Pointer, stat *uint64) {
|
||||
|
|
|
|||
1590
src/runtime/mgc.go
1590
src/runtime/mgc.go
File diff suppressed because it is too large
Load diff
|
|
@ -1,91 +0,0 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package runtime
|
||||
|
||||
import _ "unsafe" // for go:linkname
|
||||
|
||||
//go:linkname runtime_debug_freeOSMemory runtime/debug.freeOSMemory
|
||||
func runtime_debug_freeOSMemory() {
|
||||
gogc(2) // force GC and do eager sweep
|
||||
systemstack(scavenge_m)
|
||||
}
|
||||
|
||||
var poolcleanup func()
|
||||
|
||||
//go:linkname sync_runtime_registerPoolCleanup sync.runtime_registerPoolCleanup
|
||||
func sync_runtime_registerPoolCleanup(f func()) {
|
||||
poolcleanup = f
|
||||
}
|
||||
|
||||
func clearpools() {
|
||||
// clear sync.Pools
|
||||
if poolcleanup != nil {
|
||||
poolcleanup()
|
||||
}
|
||||
|
||||
for _, p := range &allp {
|
||||
if p == nil {
|
||||
break
|
||||
}
|
||||
// clear tinyalloc pool
|
||||
if c := p.mcache; c != nil {
|
||||
c.tiny = nil
|
||||
c.tinyoffset = 0
|
||||
|
||||
// disconnect cached list before dropping it on the floor,
|
||||
// so that a dangling ref to one entry does not pin all of them.
|
||||
var sg, sgnext *sudog
|
||||
for sg = c.sudogcache; sg != nil; sg = sgnext {
|
||||
sgnext = sg.next
|
||||
sg.next = nil
|
||||
}
|
||||
c.sudogcache = nil
|
||||
}
|
||||
|
||||
// clear defer pools
|
||||
for i := range p.deferpool {
|
||||
// disconnect cached list before dropping it on the floor,
|
||||
// so that a dangling ref to one entry does not pin all of them.
|
||||
var d, dlink *_defer
|
||||
for d = p.deferpool[i]; d != nil; d = dlink {
|
||||
dlink = d.link
|
||||
d.link = nil
|
||||
}
|
||||
p.deferpool[i] = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// backgroundgc is running in a goroutine and does the concurrent GC work.
|
||||
// bggc holds the state of the backgroundgc.
|
||||
func backgroundgc() {
|
||||
bggc.g = getg()
|
||||
for {
|
||||
gcwork(0)
|
||||
lock(&bggc.lock)
|
||||
bggc.working = 0
|
||||
goparkunlock(&bggc.lock, "Concurrent GC wait", traceEvGoBlock)
|
||||
}
|
||||
}
|
||||
|
||||
func bgsweep() {
|
||||
sweep.g = getg()
|
||||
for {
|
||||
for gosweepone() != ^uintptr(0) {
|
||||
sweep.nbgsweep++
|
||||
Gosched()
|
||||
}
|
||||
lock(&gclock)
|
||||
if !gosweepdone() {
|
||||
// This can happen if a GC runs between
|
||||
// gosweepone returning ^0 above
|
||||
// and the lock being acquired.
|
||||
unlock(&gclock)
|
||||
continue
|
||||
}
|
||||
sweep.parked = true
|
||||
goparkunlock(&gclock, "GC sweep wait", traceEvGoBlock)
|
||||
}
|
||||
}
|
||||
659
src/runtime/mgcmark.go
Normal file
659
src/runtime/mgcmark.go
Normal file
|
|
@ -0,0 +1,659 @@
|
|||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Garbage collector: marking and scanning
|
||||
|
||||
package runtime
|
||||
|
||||
import "unsafe"
|
||||
|
||||
// Scan all of the stacks, greying (or graying if in America) the referents
|
||||
// but not blackening them since the mark write barrier isn't installed.
|
||||
//go:nowritebarrier
|
||||
func gcscan_m() {
|
||||
_g_ := getg()
|
||||
|
||||
// Grab the g that called us and potentially allow rescheduling.
|
||||
// This allows it to be scanned like other goroutines.
|
||||
mastergp := _g_.m.curg
|
||||
casgstatus(mastergp, _Grunning, _Gwaiting)
|
||||
mastergp.waitreason = "garbage collection scan"
|
||||
|
||||
// Span sweeping has been done by finishsweep_m.
|
||||
// Long term we will want to make this goroutine runnable
|
||||
// by placing it onto a scanenqueue state and then calling
|
||||
// runtime·restartg(mastergp) to make it Grunnable.
|
||||
// At the bottom we will want to return this p back to the scheduler.
|
||||
|
||||
// Prepare flag indicating that the scan has not been completed.
|
||||
lock(&allglock)
|
||||
local_allglen := allglen
|
||||
for i := uintptr(0); i < local_allglen; i++ {
|
||||
gp := allgs[i]
|
||||
gp.gcworkdone = false // set to true in gcphasework
|
||||
gp.gcscanvalid = false // stack has not been scanned
|
||||
}
|
||||
unlock(&allglock)
|
||||
|
||||
work.nwait = 0
|
||||
work.ndone = 0
|
||||
work.nproc = 1 // For now do not do this in parallel.
|
||||
// ackgcphase is not needed since we are not scanning running goroutines.
|
||||
parforsetup(work.markfor, work.nproc, uint32(_RootCount+local_allglen), false, markroot)
|
||||
parfordo(work.markfor)
|
||||
|
||||
lock(&allglock)
|
||||
// Check that gc work is done.
|
||||
for i := uintptr(0); i < local_allglen; i++ {
|
||||
gp := allgs[i]
|
||||
if !gp.gcworkdone {
|
||||
throw("scan missed a g")
|
||||
}
|
||||
}
|
||||
unlock(&allglock)
|
||||
|
||||
casgstatus(mastergp, _Gwaiting, _Grunning)
|
||||
// Let the g that called us continue to run.
|
||||
}
|
||||
|
||||
// ptrmask for an allocation containing a single pointer.
|
||||
var oneptr = [...]uint8{typePointer}
|
||||
|
||||
//go:nowritebarrier
|
||||
func markroot(desc *parfor, i uint32) {
|
||||
var gcw gcWorkProducer
|
||||
gcw.initFromCache()
|
||||
|
||||
// Note: if you add a case here, please also update heapdump.c:dumproots.
|
||||
switch i {
|
||||
case _RootData:
|
||||
scanblock(uintptr(unsafe.Pointer(&data)), uintptr(unsafe.Pointer(&edata))-uintptr(unsafe.Pointer(&data)), gcdatamask.bytedata, &gcw)
|
||||
|
||||
case _RootBss:
|
||||
scanblock(uintptr(unsafe.Pointer(&bss)), uintptr(unsafe.Pointer(&ebss))-uintptr(unsafe.Pointer(&bss)), gcbssmask.bytedata, &gcw)
|
||||
|
||||
case _RootFinalizers:
|
||||
for fb := allfin; fb != nil; fb = fb.alllink {
|
||||
scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), uintptr(fb.cnt)*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], &gcw)
|
||||
}
|
||||
|
||||
case _RootSpans:
|
||||
// mark MSpan.specials
|
||||
sg := mheap_.sweepgen
|
||||
for spanidx := uint32(0); spanidx < uint32(len(work.spans)); spanidx++ {
|
||||
s := work.spans[spanidx]
|
||||
if s.state != mSpanInUse {
|
||||
continue
|
||||
}
|
||||
if !checkmarkphase && s.sweepgen != sg {
|
||||
// sweepgen was updated (+2) during non-checkmark GC pass
|
||||
print("sweep ", s.sweepgen, " ", sg, "\n")
|
||||
throw("gc: unswept span")
|
||||
}
|
||||
for sp := s.specials; sp != nil; sp = sp.next {
|
||||
if sp.kind != _KindSpecialFinalizer {
|
||||
continue
|
||||
}
|
||||
// don't mark finalized object, but scan it so we
|
||||
// retain everything it points to.
|
||||
spf := (*specialfinalizer)(unsafe.Pointer(sp))
|
||||
// A finalizer can be set for an inner byte of an object, find object beginning.
|
||||
p := uintptr(s.start<<_PageShift) + uintptr(spf.special.offset)/s.elemsize*s.elemsize
|
||||
if gcphase != _GCscan {
|
||||
scanblock(p, s.elemsize, nil, &gcw) // scanned during mark phase
|
||||
}
|
||||
scanblock(uintptr(unsafe.Pointer(&spf.fn)), ptrSize, &oneptr[0], &gcw)
|
||||
}
|
||||
}
|
||||
|
||||
case _RootFlushCaches:
|
||||
if gcphase != _GCscan { // Do not flush mcaches during GCscan phase.
|
||||
flushallmcaches()
|
||||
}
|
||||
|
||||
default:
|
||||
// the rest is scanning goroutine stacks
|
||||
if uintptr(i-_RootCount) >= allglen {
|
||||
throw("markroot: bad index")
|
||||
}
|
||||
gp := allgs[i-_RootCount]
|
||||
|
||||
// remember when we've first observed the G blocked
|
||||
// needed only to output in traceback
|
||||
status := readgstatus(gp) // We are not in a scan state
|
||||
if (status == _Gwaiting || status == _Gsyscall) && gp.waitsince == 0 {
|
||||
gp.waitsince = work.tstart
|
||||
}
|
||||
|
||||
// Shrink a stack if not much of it is being used but not in the scan phase.
|
||||
if gcphase == _GCmarktermination {
|
||||
// Shrink during STW GCmarktermination phase thus avoiding
|
||||
// complications introduced by shrinking during
|
||||
// non-STW phases.
|
||||
shrinkstack(gp)
|
||||
}
|
||||
if readgstatus(gp) == _Gdead {
|
||||
gp.gcworkdone = true
|
||||
} else {
|
||||
gp.gcworkdone = false
|
||||
}
|
||||
restart := stopg(gp)
|
||||
|
||||
// goroutine will scan its own stack when it stops running.
|
||||
// Wait until it has.
|
||||
for readgstatus(gp) == _Grunning && !gp.gcworkdone {
|
||||
}
|
||||
|
||||
// scanstack(gp) is done as part of gcphasework
|
||||
// But to make sure we finished we need to make sure that
|
||||
// the stack traps have all responded so drop into
|
||||
// this while loop until they respond.
|
||||
for !gp.gcworkdone {
|
||||
status = readgstatus(gp)
|
||||
if status == _Gdead {
|
||||
gp.gcworkdone = true // scan is a noop
|
||||
break
|
||||
}
|
||||
if status == _Gwaiting || status == _Grunnable {
|
||||
restart = stopg(gp)
|
||||
}
|
||||
}
|
||||
if restart {
|
||||
restartg(gp)
|
||||
}
|
||||
}
|
||||
gcw.dispose()
|
||||
}
|
||||
|
||||
// gchelpwork does a small bounded amount of gc work. The purpose is to
|
||||
// shorten the time (as measured by allocations) spent doing a concurrent GC.
|
||||
// The number of mutator calls is roughly propotional to the number of allocations
|
||||
// made by that mutator. This slows down the allocation while speeding up the GC.
|
||||
//go:nowritebarrier
|
||||
func gchelpwork() {
|
||||
switch gcphase {
|
||||
default:
|
||||
throw("gcphasework in bad gcphase")
|
||||
case _GCoff, _GCquiesce, _GCstw:
|
||||
// No work.
|
||||
case _GCsweep:
|
||||
// We could help by calling sweepone to sweep a single span.
|
||||
// _ = sweepone()
|
||||
case _GCscan:
|
||||
// scan the stack, mark the objects, put pointers in work buffers
|
||||
// hanging off the P where this is being run.
|
||||
// scanstack(gp)
|
||||
case _GCmark:
|
||||
// Get a full work buffer and empty it.
|
||||
// drain your own currentwbuf first in the hopes that it will
|
||||
// be more cache friendly.
|
||||
var gcw gcWork
|
||||
gcw.initFromCache()
|
||||
const n = len(workbuf{}.obj)
|
||||
gcDrainN(&gcw, n) // drain upto one buffer's worth of objects
|
||||
gcw.dispose()
|
||||
case _GCmarktermination:
|
||||
// We should never be here since the world is stopped.
|
||||
// All available mark work will be emptied before returning.
|
||||
throw("gcphasework in bad gcphase")
|
||||
}
|
||||
}
|
||||
|
||||
// The gp has been moved to a GC safepoint. GC phase specific
|
||||
// work is done here.
|
||||
//go:nowritebarrier
|
||||
func gcphasework(gp *g) {
|
||||
switch gcphase {
|
||||
default:
|
||||
throw("gcphasework in bad gcphase")
|
||||
case _GCoff, _GCquiesce, _GCstw, _GCsweep:
|
||||
// No work.
|
||||
case _GCscan:
|
||||
// scan the stack, mark the objects, put pointers in work buffers
|
||||
// hanging off the P where this is being run.
|
||||
// Indicate that the scan is valid until the goroutine runs again
|
||||
scanstack(gp)
|
||||
case _GCmark:
|
||||
// No work.
|
||||
case _GCmarktermination:
|
||||
scanstack(gp)
|
||||
// All available mark work will be emptied before returning.
|
||||
}
|
||||
gp.gcworkdone = true
|
||||
}
|
||||
|
||||
//go:nowritebarrier
|
||||
func scanstack(gp *g) {
|
||||
if gp.gcscanvalid {
|
||||
return
|
||||
}
|
||||
|
||||
if readgstatus(gp)&_Gscan == 0 {
|
||||
print("runtime:scanstack: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", hex(readgstatus(gp)), "\n")
|
||||
throw("scanstack - bad status")
|
||||
}
|
||||
|
||||
switch readgstatus(gp) &^ _Gscan {
|
||||
default:
|
||||
print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
|
||||
throw("mark - bad status")
|
||||
case _Gdead:
|
||||
return
|
||||
case _Grunning:
|
||||
print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
|
||||
throw("scanstack: goroutine not stopped")
|
||||
case _Grunnable, _Gsyscall, _Gwaiting:
|
||||
// ok
|
||||
}
|
||||
|
||||
if gp == getg() {
|
||||
throw("can't scan our own stack")
|
||||
}
|
||||
mp := gp.m
|
||||
if mp != nil && mp.helpgc != 0 {
|
||||
throw("can't scan gchelper stack")
|
||||
}
|
||||
|
||||
var gcw gcWorkProducer
|
||||
gcw.initFromCache()
|
||||
scanframe := func(frame *stkframe, unused unsafe.Pointer) bool {
|
||||
// Pick up gcw as free variable so gentraceback and friends can
|
||||
// keep the same signature.
|
||||
scanframeworker(frame, unused, &gcw)
|
||||
return true
|
||||
}
|
||||
gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0)
|
||||
tracebackdefers(gp, scanframe, nil)
|
||||
gcw.disposeToCache()
|
||||
gp.gcscanvalid = true
|
||||
}
|
||||
|
||||
// Scan a stack frame: local variables and function arguments/results.
|
||||
//go:nowritebarrier
|
||||
func scanframeworker(frame *stkframe, unused unsafe.Pointer, gcw *gcWorkProducer) {
|
||||
|
||||
f := frame.fn
|
||||
targetpc := frame.continpc
|
||||
if targetpc == 0 {
|
||||
// Frame is dead.
|
||||
return
|
||||
}
|
||||
if _DebugGC > 1 {
|
||||
print("scanframe ", funcname(f), "\n")
|
||||
}
|
||||
if targetpc != f.entry {
|
||||
targetpc--
|
||||
}
|
||||
pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc)
|
||||
if pcdata == -1 {
|
||||
// We do not have a valid pcdata value but there might be a
|
||||
// stackmap for this function. It is likely that we are looking
|
||||
// at the function prologue, assume so and hope for the best.
|
||||
pcdata = 0
|
||||
}
|
||||
|
||||
// Scan local variables if stack frame has been allocated.
|
||||
size := frame.varp - frame.sp
|
||||
var minsize uintptr
|
||||
if thechar != '6' && thechar != '8' {
|
||||
minsize = ptrSize
|
||||
} else {
|
||||
minsize = 0
|
||||
}
|
||||
if size > minsize {
|
||||
stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
|
||||
if stkmap == nil || stkmap.n <= 0 {
|
||||
print("runtime: frame ", funcname(f), " untyped locals ", hex(frame.varp-size), "+", hex(size), "\n")
|
||||
throw("missing stackmap")
|
||||
}
|
||||
|
||||
// Locals bitmap information, scan just the pointers in locals.
|
||||
if pcdata < 0 || pcdata >= stkmap.n {
|
||||
// don't know where we are
|
||||
print("runtime: pcdata is ", pcdata, " and ", stkmap.n, " locals stack map entries for ", funcname(f), " (targetpc=", targetpc, ")\n")
|
||||
throw("scanframe: bad symbol table")
|
||||
}
|
||||
bv := stackmapdata(stkmap, pcdata)
|
||||
size = (uintptr(bv.n) / typeBitsWidth) * ptrSize
|
||||
scanblock(frame.varp-size, size, bv.bytedata, gcw)
|
||||
}
|
||||
|
||||
// Scan arguments.
|
||||
if frame.arglen > 0 {
|
||||
var bv bitvector
|
||||
if frame.argmap != nil {
|
||||
bv = *frame.argmap
|
||||
} else {
|
||||
stkmap := (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps))
|
||||
if stkmap == nil || stkmap.n <= 0 {
|
||||
print("runtime: frame ", funcname(f), " untyped args ", hex(frame.argp), "+", hex(frame.arglen), "\n")
|
||||
throw("missing stackmap")
|
||||
}
|
||||
if pcdata < 0 || pcdata >= stkmap.n {
|
||||
// don't know where we are
|
||||
print("runtime: pcdata is ", pcdata, " and ", stkmap.n, " args stack map entries for ", funcname(f), " (targetpc=", targetpc, ")\n")
|
||||
throw("scanframe: bad symbol table")
|
||||
}
|
||||
bv = stackmapdata(stkmap, pcdata)
|
||||
}
|
||||
scanblock(frame.argp, uintptr(bv.n)/typeBitsWidth*ptrSize, bv.bytedata, gcw)
|
||||
}
|
||||
}
|
||||
|
||||
// gcDrain scans objects in work buffers (starting with wbuf), blackening grey
|
||||
// objects until all work buffers have been drained.
|
||||
//go:nowritebarrier
|
||||
func gcDrain(gcw *gcWork) {
|
||||
if gcphase != _GCmark && gcphase != _GCmarktermination {
|
||||
throw("scanblock phase incorrect")
|
||||
}
|
||||
|
||||
for {
|
||||
// If another proc wants a pointer, give it some.
|
||||
if work.nwait > 0 && work.full == 0 {
|
||||
gcw.balance()
|
||||
}
|
||||
|
||||
b := gcw.get()
|
||||
if b == 0 {
|
||||
// work barrier reached
|
||||
break
|
||||
}
|
||||
// If the current wbuf is filled by the scan a new wbuf might be
|
||||
// returned that could possibly hold only a single object. This
|
||||
// could result in each iteration draining only a single object
|
||||
// out of the wbuf passed in + a single object placed
|
||||
// into an empty wbuf in scanobject so there could be
|
||||
// a performance hit as we keep fetching fresh wbufs.
|
||||
scanobject(b, 0, nil, &gcw.gcWorkProducer)
|
||||
}
|
||||
checknocurrentwbuf()
|
||||
}
|
||||
|
||||
// gcDrainN scans n objects, blackening grey objects.
|
||||
//go:nowritebarrier
|
||||
func gcDrainN(gcw *gcWork, n int) {
|
||||
checknocurrentwbuf()
|
||||
for i := 0; i < n; i++ {
|
||||
// This might be a good place to add prefetch code...
|
||||
// if(wbuf.nobj > 4) {
|
||||
// PREFETCH(wbuf->obj[wbuf.nobj - 3];
|
||||
// }
|
||||
b := gcw.tryGet()
|
||||
if b == 0 {
|
||||
return
|
||||
}
|
||||
scanobject(b, 0, nil, &gcw.gcWorkProducer)
|
||||
}
|
||||
}
|
||||
|
||||
// scanblock scans b as scanobject would.
|
||||
// If the gcphase is GCscan, scanblock performs additional checks.
|
||||
//go:nowritebarrier
|
||||
func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWorkProducer) {
|
||||
// Use local copies of original parameters, so that a stack trace
|
||||
// due to one of the throws below shows the original block
|
||||
// base and extent.
|
||||
b := b0
|
||||
n := n0
|
||||
|
||||
// ptrmask can have 2 possible values:
|
||||
// 1. nil - obtain pointer mask from GC bitmap.
|
||||
// 2. pointer to a compact mask (for stacks and data).
|
||||
|
||||
scanobject(b, n, ptrmask, gcw)
|
||||
if gcphase == _GCscan {
|
||||
if inheap(b) && ptrmask == nil {
|
||||
// b is in heap, we are in GCscan so there should be a ptrmask.
|
||||
throw("scanblock: In GCscan phase and inheap is true.")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Scan the object b of size n, adding pointers to wbuf.
|
||||
// Return possibly new wbuf to use.
|
||||
// If ptrmask != nil, it specifies where pointers are in b.
|
||||
// If ptrmask == nil, the GC bitmap should be consulted.
|
||||
// In this case, n may be an overestimate of the size; the GC bitmap
|
||||
// must also be used to make sure the scan stops at the end of b.
|
||||
//go:nowritebarrier
|
||||
func scanobject(b, n uintptr, ptrmask *uint8, gcw *gcWorkProducer) {
|
||||
arena_start := mheap_.arena_start
|
||||
arena_used := mheap_.arena_used
|
||||
|
||||
// Find bits of the beginning of the object.
|
||||
var hbits heapBits
|
||||
if ptrmask == nil {
|
||||
b, hbits = heapBitsForObject(b)
|
||||
if b == 0 {
|
||||
return
|
||||
}
|
||||
if n == 0 {
|
||||
n = mheap_.arena_used - b
|
||||
}
|
||||
}
|
||||
for i := uintptr(0); i < n; i += ptrSize {
|
||||
// Find bits for this word.
|
||||
var bits uintptr
|
||||
if ptrmask != nil {
|
||||
// dense mask (stack or data)
|
||||
bits = (uintptr(*(*byte)(add(unsafe.Pointer(ptrmask), (i/ptrSize)/4))) >> (((i / ptrSize) % 4) * typeBitsWidth)) & typeMask
|
||||
} else {
|
||||
// Check if we have reached end of span.
|
||||
// n is an overestimate of the size of the object.
|
||||
if (b+i)%_PageSize == 0 && h_spans[(b-arena_start)>>_PageShift] != h_spans[(b+i-arena_start)>>_PageShift] {
|
||||
break
|
||||
}
|
||||
|
||||
bits = uintptr(hbits.typeBits())
|
||||
if i > 0 && (hbits.isBoundary() || bits == typeDead) {
|
||||
break // reached beginning of the next object
|
||||
}
|
||||
hbits = hbits.next()
|
||||
}
|
||||
|
||||
if bits <= typeScalar { // typeScalar, typeDead, typeScalarMarked
|
||||
continue
|
||||
}
|
||||
|
||||
if bits&typePointer != typePointer {
|
||||
print("gc checkmarkphase=", checkmarkphase, " b=", hex(b), " ptrmask=", ptrmask, "\n")
|
||||
throw("unexpected garbage collection bits")
|
||||
}
|
||||
|
||||
obj := *(*uintptr)(unsafe.Pointer(b + i))
|
||||
|
||||
// At this point we have extracted the next potential pointer.
|
||||
// Check if it points into heap.
|
||||
if obj == 0 || obj < arena_start || obj >= arena_used {
|
||||
continue
|
||||
}
|
||||
|
||||
if mheap_.shadow_enabled && debug.wbshadow >= 2 && debug.gccheckmark > 0 && checkmarkphase {
|
||||
checkwbshadow((*uintptr)(unsafe.Pointer(b + i)))
|
||||
}
|
||||
|
||||
// Mark the object.
|
||||
if obj, hbits := heapBitsForObject(obj); obj != 0 {
|
||||
greyobject(obj, b, i, hbits, gcw)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Shade the object if it isn't already.
|
||||
// The object is not nil and known to be in the heap.
|
||||
//go:nowritebarrier
|
||||
func shade(b uintptr) {
|
||||
if !inheap(b) {
|
||||
throw("shade: passed an address not in the heap")
|
||||
}
|
||||
if obj, hbits := heapBitsForObject(b); obj != 0 {
|
||||
// TODO: this would be a great place to put a check to see
|
||||
// if we are harvesting and if we are then we should
|
||||
// figure out why there is a call to shade when the
|
||||
// harvester thinks we are in a STW.
|
||||
// if atomicload(&harvestingwbufs) == uint32(1) {
|
||||
// // Throw here to discover write barriers
|
||||
// // being executed during a STW.
|
||||
// throw("shade during harvest")
|
||||
// }
|
||||
|
||||
var gcw gcWorkProducer
|
||||
greyobject(obj, 0, 0, hbits, &gcw)
|
||||
// This is part of the write barrier so put the wbuf back.
|
||||
if gcphase == _GCmarktermination {
|
||||
gcw.dispose()
|
||||
} else {
|
||||
// If we added any pointers to the gcw, then
|
||||
// currentwbuf must be nil because 1)
|
||||
// greyobject got its wbuf from currentwbuf
|
||||
// and 2) shade runs on the systemstack, so
|
||||
// we're still on the same M. If either of
|
||||
// these becomes no longer true, we need to
|
||||
// rethink this.
|
||||
gcw.disposeToCache()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// obj is the start of an object with mark mbits.
|
||||
// If it isn't already marked, mark it and enqueue into workbuf.
|
||||
// Return possibly new workbuf to use.
|
||||
// base and off are for debugging only and could be removed.
|
||||
//go:nowritebarrier
|
||||
func greyobject(obj, base, off uintptr, hbits heapBits, gcw *gcWorkProducer) {
|
||||
// obj should be start of allocation, and so must be at least pointer-aligned.
|
||||
if obj&(ptrSize-1) != 0 {
|
||||
throw("greyobject: obj not pointer-aligned")
|
||||
}
|
||||
|
||||
if checkmarkphase {
|
||||
if !hbits.isMarked() {
|
||||
print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n")
|
||||
print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n")
|
||||
|
||||
// Dump the source (base) object
|
||||
|
||||
kb := base >> _PageShift
|
||||
xb := kb
|
||||
xb -= mheap_.arena_start >> _PageShift
|
||||
sb := h_spans[xb]
|
||||
printlock()
|
||||
print("runtime:greyobject Span: base=", hex(base), " kb=", hex(kb))
|
||||
if sb == nil {
|
||||
print(" sb=nil\n")
|
||||
} else {
|
||||
print(" sb.start*_PageSize=", hex(sb.start*_PageSize), " sb.limit=", hex(sb.limit), " sb.sizeclass=", sb.sizeclass, " sb.elemsize=", sb.elemsize, "\n")
|
||||
// base is (a pointer to) the source object holding the reference to object. Create a pointer to each of the fields
|
||||
// fields in base and print them out as hex values.
|
||||
for i := 0; i < int(sb.elemsize/ptrSize); i++ {
|
||||
print(" *(base+", i*ptrSize, ") = ", hex(*(*uintptr)(unsafe.Pointer(base + uintptr(i)*ptrSize))), "\n")
|
||||
}
|
||||
}
|
||||
|
||||
// Dump the object
|
||||
|
||||
k := obj >> _PageShift
|
||||
x := k
|
||||
x -= mheap_.arena_start >> _PageShift
|
||||
s := h_spans[x]
|
||||
print("runtime:greyobject Span: obj=", hex(obj), " k=", hex(k))
|
||||
if s == nil {
|
||||
print(" s=nil\n")
|
||||
} else {
|
||||
print(" s.start=", hex(s.start*_PageSize), " s.limit=", hex(s.limit), " s.sizeclass=", s.sizeclass, " s.elemsize=", s.elemsize, "\n")
|
||||
// NOTE(rsc): This code is using s.sizeclass as an approximation of the
|
||||
// number of pointer-sized words in an object. Perhaps not what was intended.
|
||||
for i := 0; i < int(s.sizeclass); i++ {
|
||||
print(" *(obj+", i*ptrSize, ") = ", hex(*(*uintptr)(unsafe.Pointer(obj + uintptr(i)*ptrSize))), "\n")
|
||||
}
|
||||
}
|
||||
throw("checkmark found unmarked object")
|
||||
}
|
||||
if !hbits.isCheckmarked() {
|
||||
return
|
||||
}
|
||||
hbits.setCheckmarked()
|
||||
if !hbits.isCheckmarked() {
|
||||
throw("setCheckmarked and isCheckmarked disagree")
|
||||
}
|
||||
} else {
|
||||
// If marked we have nothing to do.
|
||||
if hbits.isMarked() {
|
||||
return
|
||||
}
|
||||
|
||||
// Each byte of GC bitmap holds info for two words.
|
||||
// Might be racing with other updates, so use atomic update always.
|
||||
// We used to be clever here and use a non-atomic update in certain
|
||||
// cases, but it's not worth the risk.
|
||||
hbits.setMarked()
|
||||
}
|
||||
|
||||
if !checkmarkphase && hbits.typeBits() == typeDead {
|
||||
return // noscan object
|
||||
}
|
||||
|
||||
// Queue the obj for scanning. The PREFETCH(obj) logic has been removed but
|
||||
// seems like a nice optimization that can be added back in.
|
||||
// There needs to be time between the PREFETCH and the use.
|
||||
// Previously we put the obj in an 8 element buffer that is drained at a rate
|
||||
// to give the PREFETCH time to do its work.
|
||||
// Use of PREFETCHNTA might be more appropriate than PREFETCH
|
||||
|
||||
gcw.put(obj)
|
||||
}
|
||||
|
||||
// When in GCmarkterminate phase we allocate black.
|
||||
//go:nowritebarrier
|
||||
func gcmarknewobject_m(obj uintptr) {
|
||||
if gcphase != _GCmarktermination {
|
||||
throw("marking new object while not in mark termination phase")
|
||||
}
|
||||
if checkmarkphase { // The world should be stopped so this should not happen.
|
||||
throw("gcmarknewobject called while doing checkmark")
|
||||
}
|
||||
|
||||
heapBitsForAddr(obj).setMarked()
|
||||
}
|
||||
|
||||
// Checkmarking
|
||||
|
||||
// To help debug the concurrent GC we remark with the world
|
||||
// stopped ensuring that any object encountered has their normal
|
||||
// mark bit set. To do this we use an orthogonal bit
|
||||
// pattern to indicate the object is marked. The following pattern
|
||||
// uses the upper two bits in the object's bounday nibble.
|
||||
// 01: scalar not marked
|
||||
// 10: pointer not marked
|
||||
// 11: pointer marked
|
||||
// 00: scalar marked
|
||||
// Xoring with 01 will flip the pattern from marked to unmarked and vica versa.
|
||||
// The higher bit is 1 for pointers and 0 for scalars, whether the object
|
||||
// is marked or not.
|
||||
// The first nibble no longer holds the typeDead pattern indicating that the
|
||||
// there are no more pointers in the object. This information is held
|
||||
// in the second nibble.
|
||||
|
||||
// When marking an object if the bool checkmarkphase is true one uses the above
|
||||
// encoding, otherwise one uses the bitMarked bit in the lower two bits
|
||||
// of the nibble.
|
||||
var checkmarkphase = false
|
||||
|
||||
//go:nowritebarrier
|
||||
func initCheckmarks() {
|
||||
for _, s := range work.spans {
|
||||
if s.state == _MSpanInUse {
|
||||
heapBitsForSpan(s.base()).initCheckmarkSpan(s.layout())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func clearCheckmarks() {
|
||||
for _, s := range work.spans {
|
||||
if s.state == _MSpanInUse {
|
||||
heapBitsForSpan(s.base()).clearCheckmarkSpan(s.layout())
|
||||
}
|
||||
}
|
||||
}
|
||||
312
src/runtime/mgcsweep.go
Normal file
312
src/runtime/mgcsweep.go
Normal file
|
|
@ -0,0 +1,312 @@
|
|||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Garbage collector: sweeping
|
||||
|
||||
package runtime
|
||||
|
||||
import "unsafe"
|
||||
|
||||
var sweep sweepdata
|
||||
|
||||
// State of background sweep.
|
||||
// Protected by gclock.
|
||||
type sweepdata struct {
|
||||
g *g
|
||||
parked bool
|
||||
started bool
|
||||
|
||||
spanidx uint32 // background sweeper position
|
||||
|
||||
nbgsweep uint32
|
||||
npausesweep uint32
|
||||
}
|
||||
|
||||
var gclock mutex
|
||||
|
||||
//go:nowritebarrier
|
||||
func finishsweep_m() {
|
||||
// The world is stopped so we should be able to complete the sweeps
|
||||
// quickly.
|
||||
for sweepone() != ^uintptr(0) {
|
||||
sweep.npausesweep++
|
||||
}
|
||||
|
||||
// There may be some other spans being swept concurrently that
|
||||
// we need to wait for. If finishsweep_m is done with the world stopped
|
||||
// this code is not required.
|
||||
sg := mheap_.sweepgen
|
||||
for _, s := range work.spans {
|
||||
if s.sweepgen != sg && s.state == _MSpanInUse {
|
||||
mSpan_EnsureSwept(s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func bgsweep() {
|
||||
sweep.g = getg()
|
||||
for {
|
||||
for gosweepone() != ^uintptr(0) {
|
||||
sweep.nbgsweep++
|
||||
Gosched()
|
||||
}
|
||||
lock(&gclock)
|
||||
if !gosweepdone() {
|
||||
// This can happen if a GC runs between
|
||||
// gosweepone returning ^0 above
|
||||
// and the lock being acquired.
|
||||
unlock(&gclock)
|
||||
continue
|
||||
}
|
||||
sweep.parked = true
|
||||
goparkunlock(&gclock, "GC sweep wait", traceEvGoBlock)
|
||||
}
|
||||
}
|
||||
|
||||
// sweeps one span
|
||||
// returns number of pages returned to heap, or ^uintptr(0) if there is nothing to sweep
|
||||
//go:nowritebarrier
|
||||
func sweepone() uintptr {
|
||||
_g_ := getg()
|
||||
|
||||
// increment locks to ensure that the goroutine is not preempted
|
||||
// in the middle of sweep thus leaving the span in an inconsistent state for next GC
|
||||
_g_.m.locks++
|
||||
sg := mheap_.sweepgen
|
||||
for {
|
||||
idx := xadd(&sweep.spanidx, 1) - 1
|
||||
if idx >= uint32(len(work.spans)) {
|
||||
mheap_.sweepdone = 1
|
||||
_g_.m.locks--
|
||||
return ^uintptr(0)
|
||||
}
|
||||
s := work.spans[idx]
|
||||
if s.state != mSpanInUse {
|
||||
s.sweepgen = sg
|
||||
continue
|
||||
}
|
||||
if s.sweepgen != sg-2 || !cas(&s.sweepgen, sg-2, sg-1) {
|
||||
continue
|
||||
}
|
||||
npages := s.npages
|
||||
if !mSpan_Sweep(s, false) {
|
||||
npages = 0
|
||||
}
|
||||
_g_.m.locks--
|
||||
return npages
|
||||
}
|
||||
}
|
||||
|
||||
//go:nowritebarrier
|
||||
func gosweepone() uintptr {
|
||||
var ret uintptr
|
||||
systemstack(func() {
|
||||
ret = sweepone()
|
||||
})
|
||||
return ret
|
||||
}
|
||||
|
||||
//go:nowritebarrier
|
||||
func gosweepdone() bool {
|
||||
return mheap_.sweepdone != 0
|
||||
}
|
||||
|
||||
// Returns only when span s has been swept.
|
||||
//go:nowritebarrier
|
||||
func mSpan_EnsureSwept(s *mspan) {
|
||||
// Caller must disable preemption.
|
||||
// Otherwise when this function returns the span can become unswept again
|
||||
// (if GC is triggered on another goroutine).
|
||||
_g_ := getg()
|
||||
if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 {
|
||||
throw("MSpan_EnsureSwept: m is not locked")
|
||||
}
|
||||
|
||||
sg := mheap_.sweepgen
|
||||
if atomicload(&s.sweepgen) == sg {
|
||||
return
|
||||
}
|
||||
// The caller must be sure that the span is a MSpanInUse span.
|
||||
if cas(&s.sweepgen, sg-2, sg-1) {
|
||||
mSpan_Sweep(s, false)
|
||||
return
|
||||
}
|
||||
// unfortunate condition, and we don't have efficient means to wait
|
||||
for atomicload(&s.sweepgen) != sg {
|
||||
osyield()
|
||||
}
|
||||
}
|
||||
|
||||
// Sweep frees or collects finalizers for blocks not marked in the mark phase.
|
||||
// It clears the mark bits in preparation for the next GC round.
|
||||
// Returns true if the span was returned to heap.
|
||||
// If preserve=true, don't return it to heap nor relink in MCentral lists;
|
||||
// caller takes care of it.
|
||||
//TODO go:nowritebarrier
|
||||
func mSpan_Sweep(s *mspan, preserve bool) bool {
|
||||
if checkmarkphase {
|
||||
throw("MSpan_Sweep: checkmark only runs in STW and after the sweep")
|
||||
}
|
||||
|
||||
// It's critical that we enter this function with preemption disabled,
|
||||
// GC must not start while we are in the middle of this function.
|
||||
_g_ := getg()
|
||||
if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 {
|
||||
throw("MSpan_Sweep: m is not locked")
|
||||
}
|
||||
sweepgen := mheap_.sweepgen
|
||||
if s.state != mSpanInUse || s.sweepgen != sweepgen-1 {
|
||||
print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
|
||||
throw("MSpan_Sweep: bad span state")
|
||||
}
|
||||
|
||||
if trace.enabled {
|
||||
traceGCSweepStart()
|
||||
}
|
||||
|
||||
cl := s.sizeclass
|
||||
size := s.elemsize
|
||||
res := false
|
||||
nfree := 0
|
||||
|
||||
var head, end gclinkptr
|
||||
|
||||
c := _g_.m.mcache
|
||||
sweepgenset := false
|
||||
|
||||
// Mark any free objects in this span so we don't collect them.
|
||||
for link := s.freelist; link.ptr() != nil; link = link.ptr().next {
|
||||
heapBitsForAddr(uintptr(link)).setMarkedNonAtomic()
|
||||
}
|
||||
|
||||
// Unlink & free special records for any objects we're about to free.
|
||||
specialp := &s.specials
|
||||
special := *specialp
|
||||
for special != nil {
|
||||
// A finalizer can be set for an inner byte of an object, find object beginning.
|
||||
p := uintptr(s.start<<_PageShift) + uintptr(special.offset)/size*size
|
||||
hbits := heapBitsForAddr(p)
|
||||
if !hbits.isMarked() {
|
||||
// Find the exact byte for which the special was setup
|
||||
// (as opposed to object beginning).
|
||||
p := uintptr(s.start<<_PageShift) + uintptr(special.offset)
|
||||
// about to free object: splice out special record
|
||||
y := special
|
||||
special = special.next
|
||||
*specialp = special
|
||||
if !freespecial(y, unsafe.Pointer(p), size, false) {
|
||||
// stop freeing of object if it has a finalizer
|
||||
hbits.setMarkedNonAtomic()
|
||||
}
|
||||
} else {
|
||||
// object is still live: keep special record
|
||||
specialp = &special.next
|
||||
special = *specialp
|
||||
}
|
||||
}
|
||||
|
||||
// Sweep through n objects of given size starting at p.
|
||||
// This thread owns the span now, so it can manipulate
|
||||
// the block bitmap without atomic operations.
|
||||
|
||||
size, n, _ := s.layout()
|
||||
heapBitsSweepSpan(s.base(), size, n, func(p uintptr) {
|
||||
// At this point we know that we are looking at garbage object
|
||||
// that needs to be collected.
|
||||
if debug.allocfreetrace != 0 {
|
||||
tracefree(unsafe.Pointer(p), size)
|
||||
}
|
||||
|
||||
// Reset to allocated+noscan.
|
||||
if cl == 0 {
|
||||
// Free large span.
|
||||
if preserve {
|
||||
throw("can't preserve large span")
|
||||
}
|
||||
heapBitsForSpan(p).clearSpan(s.layout())
|
||||
s.needzero = 1
|
||||
|
||||
// important to set sweepgen before returning it to heap
|
||||
atomicstore(&s.sweepgen, sweepgen)
|
||||
sweepgenset = true
|
||||
|
||||
// NOTE(rsc,dvyukov): The original implementation of efence
|
||||
// in CL 22060046 used SysFree instead of SysFault, so that
|
||||
// the operating system would eventually give the memory
|
||||
// back to us again, so that an efence program could run
|
||||
// longer without running out of memory. Unfortunately,
|
||||
// calling SysFree here without any kind of adjustment of the
|
||||
// heap data structures means that when the memory does
|
||||
// come back to us, we have the wrong metadata for it, either in
|
||||
// the MSpan structures or in the garbage collection bitmap.
|
||||
// Using SysFault here means that the program will run out of
|
||||
// memory fairly quickly in efence mode, but at least it won't
|
||||
// have mysterious crashes due to confused memory reuse.
|
||||
// It should be possible to switch back to SysFree if we also
|
||||
// implement and then call some kind of MHeap_DeleteSpan.
|
||||
if debug.efence > 0 {
|
||||
s.limit = 0 // prevent mlookup from finding this span
|
||||
sysFault(unsafe.Pointer(p), size)
|
||||
} else {
|
||||
mHeap_Free(&mheap_, s, 1)
|
||||
}
|
||||
c.local_nlargefree++
|
||||
c.local_largefree += size
|
||||
reduction := int64(size) * int64(gcpercent+100) / 100
|
||||
if int64(memstats.next_gc)-reduction > int64(heapminimum) {
|
||||
xadd64(&memstats.next_gc, -reduction)
|
||||
} else {
|
||||
atomicstore64(&memstats.next_gc, heapminimum)
|
||||
}
|
||||
res = true
|
||||
} else {
|
||||
// Free small object.
|
||||
if size > 2*ptrSize {
|
||||
*(*uintptr)(unsafe.Pointer(p + ptrSize)) = uintptrMask & 0xdeaddeaddeaddead // mark as "needs to be zeroed"
|
||||
} else if size > ptrSize {
|
||||
*(*uintptr)(unsafe.Pointer(p + ptrSize)) = 0
|
||||
}
|
||||
if head.ptr() == nil {
|
||||
head = gclinkptr(p)
|
||||
} else {
|
||||
end.ptr().next = gclinkptr(p)
|
||||
}
|
||||
end = gclinkptr(p)
|
||||
end.ptr().next = gclinkptr(0x0bade5)
|
||||
nfree++
|
||||
}
|
||||
})
|
||||
|
||||
// We need to set s.sweepgen = h.sweepgen only when all blocks are swept,
|
||||
// because of the potential for a concurrent free/SetFinalizer.
|
||||
// But we need to set it before we make the span available for allocation
|
||||
// (return it to heap or mcentral), because allocation code assumes that a
|
||||
// span is already swept if available for allocation.
|
||||
if !sweepgenset && nfree == 0 {
|
||||
// The span must be in our exclusive ownership until we update sweepgen,
|
||||
// check for potential races.
|
||||
if s.state != mSpanInUse || s.sweepgen != sweepgen-1 {
|
||||
print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
|
||||
throw("MSpan_Sweep: bad span state after sweep")
|
||||
}
|
||||
atomicstore(&s.sweepgen, sweepgen)
|
||||
}
|
||||
if nfree > 0 {
|
||||
c.local_nsmallfree[cl] += uintptr(nfree)
|
||||
c.local_cachealloc -= intptr(uintptr(nfree) * size)
|
||||
reduction := int64(nfree) * int64(size) * int64(gcpercent+100) / 100
|
||||
if int64(memstats.next_gc)-reduction > int64(heapminimum) {
|
||||
xadd64(&memstats.next_gc, -reduction)
|
||||
} else {
|
||||
atomicstore64(&memstats.next_gc, heapminimum)
|
||||
}
|
||||
res = mCentral_FreeSpan(&mheap_.central[cl].mcentral, s, int32(nfree), head, end, preserve)
|
||||
// MCentral_FreeSpan updates sweepgen
|
||||
}
|
||||
if trace.enabled {
|
||||
traceGCSweepDone()
|
||||
traceNextGC()
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
|
@ -4,7 +4,72 @@
|
|||
|
||||
// Page heap.
|
||||
//
|
||||
// See malloc.h for overview.
|
||||
// See malloc.go for overview.
|
||||
|
||||
package runtime
|
||||
|
||||
import "unsafe"
|
||||
|
||||
// Main malloc heap.
|
||||
// The heap itself is the "free[]" and "large" arrays,
|
||||
// but all the other global data is here too.
|
||||
type mheap struct {
|
||||
lock mutex
|
||||
free [_MaxMHeapList]mspan // free lists of given length
|
||||
freelarge mspan // free lists length >= _MaxMHeapList
|
||||
busy [_MaxMHeapList]mspan // busy lists of large objects of given length
|
||||
busylarge mspan // busy lists of large objects length >= _MaxMHeapList
|
||||
allspans **mspan // all spans out there
|
||||
gcspans **mspan // copy of allspans referenced by gc marker or sweeper
|
||||
nspan uint32
|
||||
sweepgen uint32 // sweep generation, see comment in mspan
|
||||
sweepdone uint32 // all spans are swept
|
||||
|
||||
// span lookup
|
||||
spans **mspan
|
||||
spans_mapped uintptr
|
||||
|
||||
// range of addresses we might see in the heap
|
||||
bitmap uintptr
|
||||
bitmap_mapped uintptr
|
||||
arena_start uintptr
|
||||
arena_used uintptr
|
||||
arena_end uintptr
|
||||
arena_reserved bool
|
||||
|
||||
// write barrier shadow data+heap.
|
||||
// 64-bit systems only, enabled by GODEBUG=wbshadow=1.
|
||||
shadow_enabled bool // shadow should be updated and checked
|
||||
shadow_reserved bool // shadow memory is reserved
|
||||
shadow_heap uintptr // heap-addr + shadow_heap = shadow heap addr
|
||||
shadow_data uintptr // data-addr + shadow_data = shadow data addr
|
||||
data_start uintptr // start of shadowed data addresses
|
||||
data_end uintptr // end of shadowed data addresses
|
||||
|
||||
// central free lists for small size classes.
|
||||
// the padding makes sure that the MCentrals are
|
||||
// spaced CacheLineSize bytes apart, so that each MCentral.lock
|
||||
// gets its own cache line.
|
||||
central [_NumSizeClasses]struct {
|
||||
mcentral mcentral
|
||||
pad [_CacheLineSize]byte
|
||||
}
|
||||
|
||||
spanalloc fixalloc // allocator for span*
|
||||
cachealloc fixalloc // allocator for mcache*
|
||||
specialfinalizeralloc fixalloc // allocator for specialfinalizer*
|
||||
specialprofilealloc fixalloc // allocator for specialprofile*
|
||||
speciallock mutex // lock for sepcial record allocators.
|
||||
|
||||
// Malloc stats.
|
||||
largefree uint64 // bytes freed for large objects (>maxsmallsize)
|
||||
nlargefree uint64 // number of frees for large objects (>maxsmallsize)
|
||||
nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
|
||||
}
|
||||
|
||||
var mheap_ mheap
|
||||
|
||||
// An MSpan is a run of pages.
|
||||
//
|
||||
// When a MSpan is in the heap free list, state == MSpanFree
|
||||
// and heapmap(s->start) == span, heapmap(s->start+s->npages-1) == span.
|
||||
|
|
@ -12,9 +77,55 @@
|
|||
// When a MSpan is allocated, state == MSpanInUse or MSpanStack
|
||||
// and heapmap(i) == span for all s->start <= i < s->start+s->npages.
|
||||
|
||||
package runtime
|
||||
// Every MSpan is in one doubly-linked list,
|
||||
// either one of the MHeap's free lists or one of the
|
||||
// MCentral's span lists. We use empty MSpan structures as list heads.
|
||||
|
||||
import "unsafe"
|
||||
const (
|
||||
_MSpanInUse = iota // allocated for garbage collected heap
|
||||
_MSpanStack // allocated for use by stack allocator
|
||||
_MSpanFree
|
||||
_MSpanListHead
|
||||
_MSpanDead
|
||||
)
|
||||
|
||||
type mspan struct {
|
||||
next *mspan // in a span linked list
|
||||
prev *mspan // in a span linked list
|
||||
start pageID // starting page number
|
||||
npages uintptr // number of pages in span
|
||||
freelist gclinkptr // list of free objects
|
||||
// sweep generation:
|
||||
// if sweepgen == h->sweepgen - 2, the span needs sweeping
|
||||
// if sweepgen == h->sweepgen - 1, the span is currently being swept
|
||||
// if sweepgen == h->sweepgen, the span is swept and ready to use
|
||||
// h->sweepgen is incremented by 2 after every GC
|
||||
sweepgen uint32
|
||||
ref uint16 // capacity - number of objects in freelist
|
||||
sizeclass uint8 // size class
|
||||
incache bool // being used by an mcache
|
||||
state uint8 // mspaninuse etc
|
||||
needzero uint8 // needs to be zeroed before allocation
|
||||
elemsize uintptr // computed from sizeclass or from npages
|
||||
unusedsince int64 // first time spotted by gc in mspanfree state
|
||||
npreleased uintptr // number of pages released to the os
|
||||
limit uintptr // end of data in span
|
||||
speciallock mutex // guards specials list
|
||||
specials *special // linked list of special records sorted by offset.
|
||||
}
|
||||
|
||||
func (s *mspan) base() uintptr {
|
||||
return uintptr(s.start << _PageShift)
|
||||
}
|
||||
|
||||
func (s *mspan) layout() (size, n, total uintptr) {
|
||||
total = s.npages << _PageShift
|
||||
size = s.elemsize
|
||||
if size > 0 {
|
||||
n = total / size
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
var h_allspans []*mspan // TODO: make this h.allspans once mheap can be defined in Go
|
||||
var h_spans []*mspan // TODO: make this h.spans once mheap can be defined in Go
|
||||
|
|
@ -50,6 +161,73 @@ func recordspan(vh unsafe.Pointer, p unsafe.Pointer) {
|
|||
h.nspan = uint32(len(h_allspans))
|
||||
}
|
||||
|
||||
// inheap reports whether b is a pointer into a (potentially dead) heap object.
|
||||
// It returns false for pointers into stack spans.
|
||||
//go:nowritebarrier
|
||||
func inheap(b uintptr) bool {
|
||||
if b == 0 || b < mheap_.arena_start || b >= mheap_.arena_used {
|
||||
return false
|
||||
}
|
||||
// Not a beginning of a block, consult span table to find the block beginning.
|
||||
k := b >> _PageShift
|
||||
x := k
|
||||
x -= mheap_.arena_start >> _PageShift
|
||||
s := h_spans[x]
|
||||
if s == nil || pageID(k) < s.start || b >= s.limit || s.state != mSpanInUse {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func mlookup(v uintptr, base *uintptr, size *uintptr, sp **mspan) int32 {
|
||||
_g_ := getg()
|
||||
|
||||
_g_.m.mcache.local_nlookup++
|
||||
if ptrSize == 4 && _g_.m.mcache.local_nlookup >= 1<<30 {
|
||||
// purge cache stats to prevent overflow
|
||||
lock(&mheap_.lock)
|
||||
purgecachedstats(_g_.m.mcache)
|
||||
unlock(&mheap_.lock)
|
||||
}
|
||||
|
||||
s := mHeap_LookupMaybe(&mheap_, unsafe.Pointer(v))
|
||||
if sp != nil {
|
||||
*sp = s
|
||||
}
|
||||
if s == nil {
|
||||
if base != nil {
|
||||
*base = 0
|
||||
}
|
||||
if size != nil {
|
||||
*size = 0
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
p := uintptr(s.start) << _PageShift
|
||||
if s.sizeclass == 0 {
|
||||
// Large object.
|
||||
if base != nil {
|
||||
*base = p
|
||||
}
|
||||
if size != nil {
|
||||
*size = s.npages << _PageShift
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
n := s.elemsize
|
||||
if base != nil {
|
||||
i := (uintptr(v) - uintptr(p)) / n
|
||||
*base = p + i*n
|
||||
}
|
||||
if size != nil {
|
||||
*size = n
|
||||
}
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
// Initialize the heap.
|
||||
func mHeap_Init(h *mheap, spans_size uintptr) {
|
||||
fixAlloc_Init(&h.spanalloc, unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys)
|
||||
|
|
@ -635,6 +813,21 @@ func mSpanList_InsertBack(list *mspan, span *mspan) {
|
|||
span.prev.next = span
|
||||
}
|
||||
|
||||
const (
|
||||
_KindSpecialFinalizer = 1
|
||||
_KindSpecialProfile = 2
|
||||
// Note: The finalizer special must be first because if we're freeing
|
||||
// an object, a finalizer special will cause the freeing operation
|
||||
// to abort, and we want to keep the other special records around
|
||||
// if that happens.
|
||||
)
|
||||
|
||||
type special struct {
|
||||
next *special // linked list in span
|
||||
offset uint16 // span offset of object
|
||||
kind byte // kind of special
|
||||
}
|
||||
|
||||
// Adds the special record s to the list of special records for
|
||||
// the object p. All fields of s should be filled in except for
|
||||
// offset & next, which this routine will fill in.
|
||||
|
|
@ -723,6 +916,15 @@ func removespecial(p unsafe.Pointer, kind uint8) *special {
|
|||
return nil
|
||||
}
|
||||
|
||||
// The described object has a finalizer set for it.
|
||||
type specialfinalizer struct {
|
||||
special special
|
||||
fn *funcval
|
||||
nret uintptr
|
||||
fint *_type
|
||||
ot *ptrtype
|
||||
}
|
||||
|
||||
// Adds a finalizer to the object p. Returns true if it succeeded.
|
||||
func addfinalizer(p unsafe.Pointer, f *funcval, nret uintptr, fint *_type, ot *ptrtype) bool {
|
||||
lock(&mheap_.speciallock)
|
||||
|
|
@ -755,6 +957,12 @@ func removefinalizer(p unsafe.Pointer) {
|
|||
unlock(&mheap_.speciallock)
|
||||
}
|
||||
|
||||
// The described object is being heap profiled.
|
||||
type specialprofile struct {
|
||||
special special
|
||||
b *bucket
|
||||
}
|
||||
|
||||
// Set the heap profile bucket associated with addr to b.
|
||||
func setprofilebucket(p unsafe.Pointer, b *bucket) {
|
||||
lock(&mheap_.speciallock)
|
||||
|
|
|
|||
|
|
@ -27,8 +27,15 @@
|
|||
|
||||
package runtime
|
||||
|
||||
//var class_to_size [_NumSizeClasses]int32
|
||||
//var class_to_allocnpages [_NumSizeClasses]int32
|
||||
// Size classes. Computed and initialized by InitSizes.
|
||||
//
|
||||
// SizeToClass(0 <= n <= MaxSmallSize) returns the size class,
|
||||
// 1 <= sizeclass < NumSizeClasses, for n.
|
||||
// Size class 0 is reserved to mean "not small".
|
||||
//
|
||||
// class_to_size[i] = largest size in class i
|
||||
// class_to_allocnpages[i] = number of pages to allocate when
|
||||
// making new objects in class i
|
||||
|
||||
// The SizeToClass lookup is implemented using two arrays,
|
||||
// one mapping sizes <= 1024 to their class and one mapping
|
||||
|
|
@ -38,8 +45,11 @@ package runtime
|
|||
// are 128-aligned, so the second array is indexed by the
|
||||
// size divided by 128 (rounded up). The arrays are filled in
|
||||
// by InitSizes.
|
||||
//var size_to_class8 [1024/8 + 1]int8
|
||||
//var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8
|
||||
|
||||
var class_to_size [_NumSizeClasses]int32
|
||||
var class_to_allocnpages [_NumSizeClasses]int32
|
||||
var size_to_class8 [1024/8 + 1]int8
|
||||
var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8
|
||||
|
||||
func sizeToClass(size int32) int32 {
|
||||
if size > _MaxSmallSize {
|
||||
|
|
|
|||
340
src/runtime/mstats.go
Normal file
340
src/runtime/mstats.go
Normal file
|
|
@ -0,0 +1,340 @@
|
|||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Memory statistics
|
||||
|
||||
package runtime
|
||||
|
||||
import "unsafe"
|
||||
|
||||
// Statistics.
|
||||
// Shared with Go: if you edit this structure, also edit type MemStats in mem.go.
|
||||
type mstats struct {
|
||||
// General statistics.
|
||||
alloc uint64 // bytes allocated and still in use
|
||||
total_alloc uint64 // bytes allocated (even if freed)
|
||||
sys uint64 // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate)
|
||||
nlookup uint64 // number of pointer lookups
|
||||
nmalloc uint64 // number of mallocs
|
||||
nfree uint64 // number of frees
|
||||
|
||||
// Statistics about malloc heap.
|
||||
// protected by mheap.lock
|
||||
heap_alloc uint64 // bytes allocated and still in use
|
||||
heap_sys uint64 // bytes obtained from system
|
||||
heap_idle uint64 // bytes in idle spans
|
||||
heap_inuse uint64 // bytes in non-idle spans
|
||||
heap_released uint64 // bytes released to the os
|
||||
heap_objects uint64 // total number of allocated objects
|
||||
|
||||
// Statistics about allocation of low-level fixed-size structures.
|
||||
// Protected by FixAlloc locks.
|
||||
stacks_inuse uint64 // this number is included in heap_inuse above
|
||||
stacks_sys uint64 // always 0 in mstats
|
||||
mspan_inuse uint64 // mspan structures
|
||||
mspan_sys uint64
|
||||
mcache_inuse uint64 // mcache structures
|
||||
mcache_sys uint64
|
||||
buckhash_sys uint64 // profiling bucket hash table
|
||||
gc_sys uint64
|
||||
other_sys uint64
|
||||
|
||||
// Statistics about garbage collector.
|
||||
// Protected by mheap or stopping the world during GC.
|
||||
next_gc uint64 // next gc (in heap_alloc time)
|
||||
last_gc uint64 // last gc (in absolute time)
|
||||
pause_total_ns uint64
|
||||
pause_ns [256]uint64 // circular buffer of recent gc pause lengths
|
||||
pause_end [256]uint64 // circular buffer of recent gc end times (nanoseconds since 1970)
|
||||
numgc uint32
|
||||
enablegc bool
|
||||
debuggc bool
|
||||
|
||||
// Statistics about allocation size classes.
|
||||
|
||||
by_size [_NumSizeClasses]struct {
|
||||
size uint32
|
||||
nmalloc uint64
|
||||
nfree uint64
|
||||
}
|
||||
|
||||
tinyallocs uint64 // number of tiny allocations that didn't cause actual allocation; not exported to go directly
|
||||
}
|
||||
|
||||
var memstats mstats
|
||||
|
||||
// Note: the MemStats struct should be kept in sync with
|
||||
// struct MStats in malloc.h
|
||||
|
||||
// A MemStats records statistics about the memory allocator.
|
||||
type MemStats struct {
|
||||
// General statistics.
|
||||
Alloc uint64 // bytes allocated and still in use
|
||||
TotalAlloc uint64 // bytes allocated (even if freed)
|
||||
Sys uint64 // bytes obtained from system (sum of XxxSys below)
|
||||
Lookups uint64 // number of pointer lookups
|
||||
Mallocs uint64 // number of mallocs
|
||||
Frees uint64 // number of frees
|
||||
|
||||
// Main allocation heap statistics.
|
||||
HeapAlloc uint64 // bytes allocated and still in use
|
||||
HeapSys uint64 // bytes obtained from system
|
||||
HeapIdle uint64 // bytes in idle spans
|
||||
HeapInuse uint64 // bytes in non-idle span
|
||||
HeapReleased uint64 // bytes released to the OS
|
||||
HeapObjects uint64 // total number of allocated objects
|
||||
|
||||
// Low-level fixed-size structure allocator statistics.
|
||||
// Inuse is bytes used now.
|
||||
// Sys is bytes obtained from system.
|
||||
StackInuse uint64 // bytes used by stack allocator
|
||||
StackSys uint64
|
||||
MSpanInuse uint64 // mspan structures
|
||||
MSpanSys uint64
|
||||
MCacheInuse uint64 // mcache structures
|
||||
MCacheSys uint64
|
||||
BuckHashSys uint64 // profiling bucket hash table
|
||||
GCSys uint64 // GC metadata
|
||||
OtherSys uint64 // other system allocations
|
||||
|
||||
// Garbage collector statistics.
|
||||
NextGC uint64 // next collection will happen when HeapAlloc ≥ this amount
|
||||
LastGC uint64 // end time of last collection (nanoseconds since 1970)
|
||||
PauseTotalNs uint64
|
||||
PauseNs [256]uint64 // circular buffer of recent GC pause durations, most recent at [(NumGC+255)%256]
|
||||
PauseEnd [256]uint64 // circular buffer of recent GC pause end times
|
||||
NumGC uint32
|
||||
EnableGC bool
|
||||
DebugGC bool
|
||||
|
||||
// Per-size allocation statistics.
|
||||
// 61 is NumSizeClasses in the C code.
|
||||
BySize [61]struct {
|
||||
Size uint32
|
||||
Mallocs uint64
|
||||
Frees uint64
|
||||
}
|
||||
}
|
||||
|
||||
// Size of the trailing by_size array differs between Go and C,
|
||||
// and all data after by_size is local to runtime, not exported.
|
||||
// NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
|
||||
// sizeof_C_MStats is what C thinks about size of Go struct.
|
||||
var sizeof_C_MStats = unsafe.Offsetof(memstats.by_size) + 61*unsafe.Sizeof(memstats.by_size[0])
|
||||
|
||||
func init() {
|
||||
var memStats MemStats
|
||||
if sizeof_C_MStats != unsafe.Sizeof(memStats) {
|
||||
println(sizeof_C_MStats, unsafe.Sizeof(memStats))
|
||||
throw("MStats vs MemStatsType size mismatch")
|
||||
}
|
||||
}
|
||||
|
||||
// ReadMemStats populates m with memory allocator statistics.
|
||||
func ReadMemStats(m *MemStats) {
|
||||
// Have to acquire worldsema to stop the world,
|
||||
// because stoptheworld can only be used by
|
||||
// one goroutine at a time, and there might be
|
||||
// a pending garbage collection already calling it.
|
||||
semacquire(&worldsema, false)
|
||||
gp := getg()
|
||||
gp.m.preemptoff = "read mem stats"
|
||||
systemstack(stoptheworld)
|
||||
|
||||
systemstack(func() {
|
||||
readmemstats_m(m)
|
||||
})
|
||||
|
||||
gp.m.preemptoff = ""
|
||||
gp.m.locks++
|
||||
semrelease(&worldsema)
|
||||
systemstack(starttheworld)
|
||||
gp.m.locks--
|
||||
}
|
||||
|
||||
func readmemstats_m(stats *MemStats) {
|
||||
updatememstats(nil)
|
||||
|
||||
// Size of the trailing by_size array differs between Go and C,
|
||||
// NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
|
||||
memmove(unsafe.Pointer(stats), unsafe.Pointer(&memstats), sizeof_C_MStats)
|
||||
|
||||
// Stack numbers are part of the heap numbers, separate those out for user consumption
|
||||
stats.StackSys = stats.StackInuse
|
||||
stats.HeapInuse -= stats.StackInuse
|
||||
stats.HeapSys -= stats.StackInuse
|
||||
}
|
||||
|
||||
//go:linkname readGCStats runtime/debug.readGCStats
|
||||
func readGCStats(pauses *[]uint64) {
|
||||
systemstack(func() {
|
||||
readGCStats_m(pauses)
|
||||
})
|
||||
}
|
||||
|
||||
func readGCStats_m(pauses *[]uint64) {
|
||||
p := *pauses
|
||||
// Calling code in runtime/debug should make the slice large enough.
|
||||
if cap(p) < len(memstats.pause_ns)+3 {
|
||||
throw("short slice passed to readGCStats")
|
||||
}
|
||||
|
||||
// Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns.
|
||||
lock(&mheap_.lock)
|
||||
|
||||
n := memstats.numgc
|
||||
if n > uint32(len(memstats.pause_ns)) {
|
||||
n = uint32(len(memstats.pause_ns))
|
||||
}
|
||||
|
||||
// The pause buffer is circular. The most recent pause is at
|
||||
// pause_ns[(numgc-1)%len(pause_ns)], and then backward
|
||||
// from there to go back farther in time. We deliver the times
|
||||
// most recent first (in p[0]).
|
||||
p = p[:cap(p)]
|
||||
for i := uint32(0); i < n; i++ {
|
||||
j := (memstats.numgc - 1 - i) % uint32(len(memstats.pause_ns))
|
||||
p[i] = memstats.pause_ns[j]
|
||||
p[n+i] = memstats.pause_end[j]
|
||||
}
|
||||
|
||||
p[n+n] = memstats.last_gc
|
||||
p[n+n+1] = uint64(memstats.numgc)
|
||||
p[n+n+2] = memstats.pause_total_ns
|
||||
unlock(&mheap_.lock)
|
||||
*pauses = p[:n+n+3]
|
||||
}
|
||||
|
||||
//go:nowritebarrier
|
||||
func updatememstats(stats *gcstats) {
|
||||
if stats != nil {
|
||||
*stats = gcstats{}
|
||||
}
|
||||
for mp := allm; mp != nil; mp = mp.alllink {
|
||||
if stats != nil {
|
||||
src := (*[unsafe.Sizeof(gcstats{}) / 8]uint64)(unsafe.Pointer(&mp.gcstats))
|
||||
dst := (*[unsafe.Sizeof(gcstats{}) / 8]uint64)(unsafe.Pointer(stats))
|
||||
for i, v := range src {
|
||||
dst[i] += v
|
||||
}
|
||||
mp.gcstats = gcstats{}
|
||||
}
|
||||
}
|
||||
|
||||
memstats.mcache_inuse = uint64(mheap_.cachealloc.inuse)
|
||||
memstats.mspan_inuse = uint64(mheap_.spanalloc.inuse)
|
||||
memstats.sys = memstats.heap_sys + memstats.stacks_sys + memstats.mspan_sys +
|
||||
memstats.mcache_sys + memstats.buckhash_sys + memstats.gc_sys + memstats.other_sys
|
||||
|
||||
// Calculate memory allocator stats.
|
||||
// During program execution we only count number of frees and amount of freed memory.
|
||||
// Current number of alive object in the heap and amount of alive heap memory
|
||||
// are calculated by scanning all spans.
|
||||
// Total number of mallocs is calculated as number of frees plus number of alive objects.
|
||||
// Similarly, total amount of allocated memory is calculated as amount of freed memory
|
||||
// plus amount of alive heap memory.
|
||||
memstats.alloc = 0
|
||||
memstats.total_alloc = 0
|
||||
memstats.nmalloc = 0
|
||||
memstats.nfree = 0
|
||||
for i := 0; i < len(memstats.by_size); i++ {
|
||||
memstats.by_size[i].nmalloc = 0
|
||||
memstats.by_size[i].nfree = 0
|
||||
}
|
||||
|
||||
// Flush MCache's to MCentral.
|
||||
systemstack(flushallmcaches)
|
||||
|
||||
// Aggregate local stats.
|
||||
cachestats()
|
||||
|
||||
// Scan all spans and count number of alive objects.
|
||||
lock(&mheap_.lock)
|
||||
for i := uint32(0); i < mheap_.nspan; i++ {
|
||||
s := h_allspans[i]
|
||||
if s.state != mSpanInUse {
|
||||
continue
|
||||
}
|
||||
if s.sizeclass == 0 {
|
||||
memstats.nmalloc++
|
||||
memstats.alloc += uint64(s.elemsize)
|
||||
} else {
|
||||
memstats.nmalloc += uint64(s.ref)
|
||||
memstats.by_size[s.sizeclass].nmalloc += uint64(s.ref)
|
||||
memstats.alloc += uint64(s.ref) * uint64(s.elemsize)
|
||||
}
|
||||
}
|
||||
unlock(&mheap_.lock)
|
||||
|
||||
// Aggregate by size class.
|
||||
smallfree := uint64(0)
|
||||
memstats.nfree = mheap_.nlargefree
|
||||
for i := 0; i < len(memstats.by_size); i++ {
|
||||
memstats.nfree += mheap_.nsmallfree[i]
|
||||
memstats.by_size[i].nfree = mheap_.nsmallfree[i]
|
||||
memstats.by_size[i].nmalloc += mheap_.nsmallfree[i]
|
||||
smallfree += uint64(mheap_.nsmallfree[i]) * uint64(class_to_size[i])
|
||||
}
|
||||
memstats.nfree += memstats.tinyallocs
|
||||
memstats.nmalloc += memstats.nfree
|
||||
|
||||
// Calculate derived stats.
|
||||
memstats.total_alloc = uint64(memstats.alloc) + uint64(mheap_.largefree) + smallfree
|
||||
memstats.heap_alloc = memstats.alloc
|
||||
memstats.heap_objects = memstats.nmalloc - memstats.nfree
|
||||
}
|
||||
|
||||
//go:nowritebarrier
|
||||
func cachestats() {
|
||||
for i := 0; ; i++ {
|
||||
p := allp[i]
|
||||
if p == nil {
|
||||
break
|
||||
}
|
||||
c := p.mcache
|
||||
if c == nil {
|
||||
continue
|
||||
}
|
||||
purgecachedstats(c)
|
||||
}
|
||||
}
|
||||
|
||||
//go:nowritebarrier
|
||||
func flushallmcaches() {
|
||||
for i := 0; ; i++ {
|
||||
p := allp[i]
|
||||
if p == nil {
|
||||
break
|
||||
}
|
||||
c := p.mcache
|
||||
if c == nil {
|
||||
continue
|
||||
}
|
||||
mCache_ReleaseAll(c)
|
||||
stackcache_clear(c)
|
||||
}
|
||||
}
|
||||
|
||||
//go:nosplit
|
||||
func purgecachedstats(c *mcache) {
|
||||
// Protected by either heap or GC lock.
|
||||
h := &mheap_
|
||||
memstats.heap_alloc += uint64(c.local_cachealloc)
|
||||
c.local_cachealloc = 0
|
||||
if trace.enabled {
|
||||
traceHeapAlloc()
|
||||
}
|
||||
memstats.tinyallocs += uint64(c.local_tinyallocs)
|
||||
c.local_tinyallocs = 0
|
||||
memstats.nlookup += uint64(c.local_nlookup)
|
||||
c.local_nlookup = 0
|
||||
h.largefree += uint64(c.local_largefree)
|
||||
c.local_largefree = 0
|
||||
h.nlargefree += uint64(c.local_nlargefree)
|
||||
c.local_nlargefree = 0
|
||||
for i := 0; i < len(c.local_nsmallfree); i++ {
|
||||
h.nsmallfree[i] += uint64(c.local_nsmallfree[i])
|
||||
c.local_nsmallfree[i] = 0
|
||||
}
|
||||
}
|
||||
|
|
@ -528,6 +528,21 @@ func quiesce(mastergp *g) {
|
|||
mcall(mquiesce)
|
||||
}
|
||||
|
||||
// Holding worldsema grants an M the right to try to stop the world.
|
||||
// The procedure is:
|
||||
//
|
||||
// semacquire(&worldsema);
|
||||
// m.preemptoff = "reason";
|
||||
// stoptheworld();
|
||||
//
|
||||
// ... do stuff ...
|
||||
//
|
||||
// m.preemptoff = "";
|
||||
// semrelease(&worldsema);
|
||||
// starttheworld();
|
||||
//
|
||||
var worldsema uint32 = 1
|
||||
|
||||
// This is used by the GC as well as the routines that do stack dumps. In the case
|
||||
// of GC all the routines can be reliably stopped. This is not always the case
|
||||
// when the system is in panic or being exited.
|
||||
|
|
|
|||
|
|
@ -239,3 +239,13 @@ func prefetcht0(addr uintptr)
|
|||
func prefetcht1(addr uintptr)
|
||||
func prefetcht2(addr uintptr)
|
||||
func prefetchnta(addr uintptr)
|
||||
|
||||
func unixnanotime() int64 {
|
||||
sec, nsec := time_now()
|
||||
return sec*1e9 + int64(nsec)
|
||||
}
|
||||
|
||||
// round n up to a multiple of a. a must be a power of 2.
|
||||
func round(n, a uintptr) uintptr {
|
||||
return (n + a - 1) &^ (a - 1)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -299,3 +299,17 @@ func readvarint(p []byte) (newp []byte, val uint32) {
|
|||
}
|
||||
return p, v
|
||||
}
|
||||
|
||||
type stackmap struct {
|
||||
n int32 // number of bitmaps
|
||||
nbit int32 // number of bits in each bitmap
|
||||
bytedata [1]byte // bitmaps, each starting on a 32-bit boundary
|
||||
}
|
||||
|
||||
//go:nowritebarrier
|
||||
func stackmapdata(stkmap *stackmap, n int32) bitvector {
|
||||
if n < 0 || n >= stkmap.n {
|
||||
throw("stackmapdata: index out of range")
|
||||
}
|
||||
return bitvector{stkmap.nbit, (*byte)(add(unsafe.Pointer(&stkmap.bytedata), uintptr(n*((stkmap.nbit+31)/32*4))))}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue