[dev.garbage] runtime: allocate directly from GC mark bits

Instead of building a freelist from the mark bits generated
by the GC this CL allocates directly from the mark bits.

The approach moves the mark bits from the pointer/no pointer
heap structures into their own per span data structures. The
mark/allocation vectors consist of a single mark bit per
object. Two vectors are maintained, one for allocation and
one for the GC's mark phase. During the GC cycle's sweep
phase the interpretation of the vectors is swapped. The
mark vector becomes the allocation vector and the old
allocation vector is cleared and becomes the mark vector that
the next GC cycle will use.

Marked entries in the allocation vector indicate that the
object is not free. Each allocation vector maintains a boundary
between areas of the span already allocated from and areas
not yet allocated from. As objects are allocated this boundary
is moved until it reaches the end of the span. At this point
further allocations will be done from another span.

Since we no longer sweep a span inspecting each freed object
the responsibility for maintaining pointer/scalar bits in
the heapBitMap containing is now the responsibility of the
the routines doing the actual allocation.

This CL is functionally complete and ready for performance
tuning.

Change-Id: I336e0fc21eef1066e0b68c7067cc71b9f3d50e04
Reviewed-on: https://go-review.googlesource.com/19470
Reviewed-by: Austin Clements <austin@google.com>
This commit is contained in:
Rick Hudson 2016-02-11 13:57:58 -05:00
parent dc65a82eff
commit 3479b065d4
9 changed files with 282 additions and 219 deletions

View file

@ -502,23 +502,34 @@ const (
// weight allocation. If it is a heavy weight allocation the caller must
// determine whether a new GC cycle needs to be started or if the GC is active
// whether this goroutine needs to assist the GC.
// https://golang.org/cl/5350 motivates why this routine should preform a
// prefetch.
func (c *mcache) nextFree(sizeclass int8) (v gclinkptr, shouldhelpgc bool) {
s := c.alloc[sizeclass]
v = s.freelist
if v.ptr() == nil {
shouldhelpgc = false
freeIndex := s.nextFreeIndex(s.freeindex)
if freeIndex == s.nelems {
// The span is full.
if uintptr(s.ref) != s.nelems {
throw("s.ref != s.nelems && freeIndex == s.nelems")
}
systemstack(func() {
c.refill(int32(sizeclass))
})
shouldhelpgc = true
s = c.alloc[sizeclass]
v = s.freelist
freeIndex = s.nextFreeIndex(s.freeindex)
}
s.freelist = v.ptr().next
if freeIndex >= s.nelems {
throw("freeIndex is not valid")
}
v = gclinkptr(freeIndex*s.elemsize + s.base())
// Advance the freeIndex.
s.freeindex = freeIndex + 1
s.ref++
// prefetchnta offers best performance, see change list message.
prefetchnta(uintptr(v.ptr().next))
if uintptr(s.ref) > s.nelems {
throw("s.ref > s.nelems")
}
return
}
@ -655,10 +666,8 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
v, shouldhelpgc = c.nextFree(sizeclass)
x = unsafe.Pointer(v)
if flags&flagNoZero == 0 {
v.ptr().next = 0
if size > 2*sys.PtrSize && ((*[2]uintptr)(x))[1] != 0 {
memclr(unsafe.Pointer(v), size)
}
memclr(unsafe.Pointer(v), size)
// TODO:(rlh) Only clear if object is not known to be zeroed.
}
}
} else {
@ -667,12 +676,13 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
systemstack(func() {
s = largeAlloc(size, flags)
})
s.freeindex = 1
x = unsafe.Pointer(uintptr(s.start << pageShift))
size = s.elemsize
}
if flags&flagNoScan != 0 {
// All objects are pre-marked as noscan. Nothing to do.
heapBitsSetTypeNoScan(uintptr(x), size)
} else {
// If allocating a defer+arg block, now that we've picked a malloc size
// large enough to hold everything, cut the "asked for" size down to