mirror of
https://github.com/golang/go.git
synced 2025-11-11 22:21:06 +00:00
runtime: keep objects in free lists marked as allocated.
Restore https://golang.org/cl/41040043 after GC rewrite. Original description: On the plus side, we don't need to change the bits on malloc and free. On the downside, we need to mark objects in the free lists during GC. But the free lists are small at GC time, so it should be a net win. benchmark old ns/op new ns/op delta BenchmarkMalloc8 21.9 20.4 -6.85% BenchmarkMalloc16 31.1 29.6 -4.82% LGTM=khr R=khr CC=golang-codereviews, rlh, rsc https://golang.org/cl/122280043
This commit is contained in:
parent
aa549ce449
commit
187d0f6720
4 changed files with 136 additions and 133 deletions
|
|
@ -525,11 +525,17 @@ dumproots(void)
|
|||
runtime·iterate_finq(finq_callback);
|
||||
}
|
||||
|
||||
// Bit vector of free marks.
|
||||
// Needs to be as big as the largest number of objects per span.
|
||||
#pragma dataflag NOPTR
|
||||
static byte free[PageSize/8];
|
||||
|
||||
static void
|
||||
dumpobjs(void)
|
||||
{
|
||||
uintptr i, j, size, n, off, shift, *bitp, bits;
|
||||
uintptr i, j, size, n;
|
||||
MSpan *s;
|
||||
MLink *l;
|
||||
byte *p;
|
||||
|
||||
for(i = 0; i < runtime·mheap.nspan; i++) {
|
||||
|
|
@ -539,13 +545,15 @@ dumpobjs(void)
|
|||
p = (byte*)(s->start << PageShift);
|
||||
size = s->elemsize;
|
||||
n = (s->npages << PageShift) / size;
|
||||
if(n > nelem(free))
|
||||
runtime·throw("free array doesn't have enough entries");
|
||||
for(l = s->freelist; l != nil; l = l->next)
|
||||
free[((byte*)l - p) / size] = true;
|
||||
for(j = 0; j < n; j++, p += size) {
|
||||
off = (uintptr*)p - (uintptr*)runtime·mheap.arena_start;
|
||||
bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
|
||||
shift = (off % wordsPerBitmapWord) * gcBits;
|
||||
bits = (*bitp >> shift) & bitMask;
|
||||
if(bits != bitAllocated)
|
||||
if(free[j]) {
|
||||
free[j] = false;
|
||||
continue;
|
||||
}
|
||||
dumpobj(p, size, makeheapobjbv(p, size));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,11 +41,9 @@ const (
|
|||
bitsDead = 0
|
||||
bitsPointer = 2
|
||||
|
||||
bitMiddle = 0
|
||||
bitBoundary = 1
|
||||
bitAllocated = 2
|
||||
bitMarked = 3
|
||||
bitMask = bitMiddle | bitBoundary | bitAllocated | bitMarked
|
||||
bitMarked = 2
|
||||
bitMask = bitBoundary | bitMarked
|
||||
)
|
||||
|
||||
// All zero-sized allocations return a pointer to this byte.
|
||||
|
|
@ -185,31 +183,28 @@ func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer {
|
|||
size = uintptr(s.elemsize)
|
||||
}
|
||||
|
||||
if flags&flagNoScan != 0 {
|
||||
// All objects are pre-marked as noscan.
|
||||
goto marked
|
||||
}
|
||||
|
||||
// From here till marked label marking the object as allocated
|
||||
// and storing type info in the GC bitmap.
|
||||
{
|
||||
arena_start := uintptr(unsafe.Pointer(mheap_.arena_start))
|
||||
off := (uintptr(x) - arena_start) / ptrSize
|
||||
xbits := (*uintptr)(unsafe.Pointer(arena_start - off/wordsPerBitmapWord*ptrSize - ptrSize))
|
||||
shift := (off % wordsPerBitmapWord) * gcBits
|
||||
if debugMalloc && (((*xbits)>>shift)&bitMask) != bitBoundary {
|
||||
if debugMalloc && ((*xbits>>shift)&(bitMask|bitPtrMask)) != bitBoundary {
|
||||
println("runtime: bits =", (*xbits>>shift)&(bitMask|bitPtrMask))
|
||||
gothrow("bad bits in markallocated")
|
||||
}
|
||||
|
||||
var ti, te uintptr
|
||||
var ptrmask *uint8
|
||||
if flags&flagNoScan != 0 {
|
||||
// bitsDead in the first quadruple means don't scan.
|
||||
if size == ptrSize {
|
||||
*xbits = (*xbits & ^((bitBoundary | bitPtrMask) << shift)) | ((bitAllocated + (bitsDead << 2)) << shift)
|
||||
} else {
|
||||
xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8))
|
||||
*xbitsb = bitAllocated + (bitsDead << 2)
|
||||
}
|
||||
goto marked
|
||||
}
|
||||
if size == ptrSize {
|
||||
// It's one word and it has pointers, it must be a pointer.
|
||||
*xbits = (*xbits & ^((bitBoundary | bitPtrMask) << shift)) | ((bitAllocated | (bitsPointer << 2)) << shift)
|
||||
*xbits |= (bitsPointer << 2) << shift
|
||||
goto marked
|
||||
}
|
||||
if typ != nil && (uintptr(typ.gc[0])|uintptr(typ.gc[1])) != 0 && uintptr(typ.size) > ptrSize {
|
||||
|
|
@ -245,7 +240,7 @@ func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer {
|
|||
}
|
||||
if size == 2*ptrSize {
|
||||
xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8))
|
||||
*xbitsb = *ptrmask | bitAllocated
|
||||
*xbitsb = *ptrmask | bitBoundary
|
||||
goto marked
|
||||
}
|
||||
te = uintptr(typ.size) / ptrSize
|
||||
|
|
@ -256,7 +251,7 @@ func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer {
|
|||
}
|
||||
if size == 2*ptrSize {
|
||||
xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8))
|
||||
*xbitsb = (bitsPointer << 2) | (bitsPointer << 6) | bitAllocated
|
||||
*xbitsb = (bitsPointer << 2) | (bitsPointer << 6) | bitBoundary
|
||||
goto marked
|
||||
}
|
||||
// Copy pointer bitmask into the bitmap.
|
||||
|
|
@ -270,10 +265,10 @@ func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer {
|
|||
}
|
||||
}
|
||||
if i == 0 {
|
||||
v |= bitAllocated
|
||||
v |= bitBoundary
|
||||
}
|
||||
if i+ptrSize == size0 {
|
||||
v &= ^uint8(bitPtrMask << 4)
|
||||
v &^= uint8(bitPtrMask << 4)
|
||||
}
|
||||
|
||||
off := (uintptr(x) + i - arena_start) / ptrSize
|
||||
|
|
@ -290,6 +285,7 @@ func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer {
|
|||
xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8))
|
||||
*xbitsb = bitsDead << 2
|
||||
}
|
||||
}
|
||||
marked:
|
||||
mp.mallocing = 0
|
||||
|
||||
|
|
|
|||
|
|
@ -318,7 +318,7 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
|
|||
bits = cached;
|
||||
cached >>= gcBits;
|
||||
ncached--;
|
||||
if(i != 0 && (bits&bitMask) != bitMiddle)
|
||||
if(i != 0 && (bits&bitBoundary) != 0)
|
||||
break; // reached beginning of the next object
|
||||
bits = (bits>>2)&BitsMask;
|
||||
if(bits == BitsDead)
|
||||
|
|
@ -403,13 +403,13 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
|
|||
shift = (off % wordsPerBitmapWord) * gcBits;
|
||||
xbits = *bitp;
|
||||
bits = (xbits >> shift) & bitMask;
|
||||
if(bits == bitMiddle) {
|
||||
if((bits&bitBoundary) == 0) {
|
||||
// Not a beginning of a block, check if we have block boundary in xbits.
|
||||
while(shift > 0) {
|
||||
obj -= PtrSize;
|
||||
shift -= gcBits;
|
||||
bits = (xbits >> shift) & bitMask;
|
||||
if(bits != bitMiddle)
|
||||
if((bits&bitBoundary) != 0)
|
||||
goto havebits;
|
||||
}
|
||||
// Otherwise consult span table to find the block beginning.
|
||||
|
|
@ -426,7 +426,8 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
|
|||
p = p+idx*size;
|
||||
}
|
||||
if(p == obj) {
|
||||
runtime·printf("runtime: failed to find block beginning for %p s->limit=%p\n", p, s->limit);
|
||||
runtime·printf("runtime: failed to find block beginning for %p s=%p s->limit=%p\n",
|
||||
p, s->start*PageSize, s->limit);
|
||||
runtime·throw("failed to find block beginning");
|
||||
}
|
||||
obj = p;
|
||||
|
|
@ -436,8 +437,8 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
|
|||
havebits:
|
||||
// Now we have bits, bitp, and shift correct for
|
||||
// obj pointing at the base of the object.
|
||||
// Only care about allocated and not marked.
|
||||
if(bits != bitAllocated)
|
||||
// Only care about not marked objects.
|
||||
if((bits&bitMarked) != 0)
|
||||
continue;
|
||||
if(work.nproc == 1)
|
||||
*bitp |= bitMarked<<shift;
|
||||
|
|
@ -445,12 +446,12 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
|
|||
for(;;) {
|
||||
xbits = *bitp;
|
||||
bits = (xbits>>shift) & bitMask;
|
||||
if(bits != bitAllocated)
|
||||
if((bits&bitMarked) != 0)
|
||||
break;
|
||||
if(runtime·casp((void**)bitp, (void*)xbits, (void*)(xbits|(bitMarked<<shift))))
|
||||
break;
|
||||
}
|
||||
if(bits != bitAllocated)
|
||||
if((bits&bitMarked) != 0)
|
||||
continue;
|
||||
}
|
||||
if(((xbits>>(shift+2))&BitsMask) == BitsDead)
|
||||
|
|
@ -892,7 +893,7 @@ runtime·MSpan_Sweep(MSpan *s)
|
|||
byte *p;
|
||||
MCache *c;
|
||||
byte *arena_start;
|
||||
MLink head, *end;
|
||||
MLink head, *end, *link;
|
||||
Special *special, **specialp, *y;
|
||||
bool res, sweepgenset;
|
||||
|
||||
|
|
@ -922,6 +923,14 @@ runtime·MSpan_Sweep(MSpan *s)
|
|||
c = g->m->mcache;
|
||||
sweepgenset = false;
|
||||
|
||||
// Mark any free objects in this span so we don't collect them.
|
||||
for(link = s->freelist; link != nil; link = link->next) {
|
||||
off = (uintptr*)link - (uintptr*)arena_start;
|
||||
bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
|
||||
shift = (off % wordsPerBitmapWord) * gcBits;
|
||||
*bitp |= bitMarked<<shift;
|
||||
}
|
||||
|
||||
// Unlink & free special records for any objects we're about to free.
|
||||
specialp = &s->specials;
|
||||
special = *specialp;
|
||||
|
|
@ -932,7 +941,7 @@ runtime·MSpan_Sweep(MSpan *s)
|
|||
bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
|
||||
shift = (off % wordsPerBitmapWord) * gcBits;
|
||||
bits = (*bitp>>shift) & bitMask;
|
||||
if(bits == bitAllocated) {
|
||||
if((bits&bitMarked) == 0) {
|
||||
// Find the exact byte for which the special was setup
|
||||
// (as opposed to object beginning).
|
||||
p = (byte*)(s->start << PageShift) + special->offset;
|
||||
|
|
@ -946,10 +955,6 @@ runtime·MSpan_Sweep(MSpan *s)
|
|||
}
|
||||
} else {
|
||||
// object is still live: keep special record
|
||||
if(bits != bitMarked) {
|
||||
runtime·printf("runtime: bad bits for special object %p: %d\n", p, (int32)bits);
|
||||
runtime·throw("runtime: bad bits for special object");
|
||||
}
|
||||
specialp = &special->next;
|
||||
special = *specialp;
|
||||
}
|
||||
|
|
@ -966,20 +971,17 @@ runtime·MSpan_Sweep(MSpan *s)
|
|||
xbits = *bitp;
|
||||
bits = (xbits>>shift) & bitMask;
|
||||
|
||||
// Non-allocated object, ignore.
|
||||
if(bits == bitBoundary)
|
||||
continue;
|
||||
// Allocated and marked object, reset bits to allocated.
|
||||
if(bits == bitMarked) {
|
||||
*bitp = (xbits & ~(bitMarked<<shift)) | (bitAllocated<<shift);
|
||||
if((bits&bitMarked) != 0) {
|
||||
*bitp &= ~(bitMarked<<shift);
|
||||
continue;
|
||||
}
|
||||
// At this point we know that we are looking at garbage object
|
||||
// that needs to be collected.
|
||||
if(runtime·debug.allocfreetrace)
|
||||
runtime·tracefree(p, size);
|
||||
// Reset to boundary.
|
||||
*bitp = (xbits & ~(bitAllocated<<shift)) | (bitBoundary<<shift);
|
||||
// Reset to allocated+noscan.
|
||||
*bitp = (xbits & ~((bitMarked|(BitsMask<<2))<<shift)) | ((uintptr)BitsDead<<(shift+2));
|
||||
if(cl == 0) {
|
||||
// Free large span.
|
||||
runtime·unmarkspan(p, s->npages<<PageShift);
|
||||
|
|
@ -1857,13 +1859,13 @@ runtime·unrollgcproginplace_m(void)
|
|||
off = (uintptr*)v - (uintptr*)arena_start;
|
||||
b = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
|
||||
shift = (off % wordsPerBitmapWord) * gcBits;
|
||||
*b |= bitAllocated<<shift;
|
||||
*b |= bitBoundary<<shift;
|
||||
// Mark word after last as BitsDead.
|
||||
if(size0 < size) {
|
||||
off = (uintptr*)((byte*)v + size0) - (uintptr*)arena_start;
|
||||
b = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
|
||||
shift = (off % wordsPerBitmapWord) * gcBits;
|
||||
*b &= ~(bitPtrMask<<shift) | (BitsDead<<(shift+2));
|
||||
*b &= ~(bitPtrMask<<shift) | ((uintptr)BitsDead<<(shift+2));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1931,7 +1933,7 @@ runtime·markspan(void *v, uintptr size, uintptr n, bool leftover)
|
|||
b0 = b;
|
||||
x = 0;
|
||||
}
|
||||
x |= bitBoundary<<shift;
|
||||
x |= (bitBoundary<<shift) | ((uintptr)BitsDead<<(shift+2));
|
||||
}
|
||||
*b0 = x;
|
||||
}
|
||||
|
|
@ -1958,8 +1960,7 @@ runtime·unmarkspan(void *v, uintptr n)
|
|||
// one span, so no other goroutines are changing these
|
||||
// bitmap words.
|
||||
n /= wordsPerBitmapWord;
|
||||
while(n-- > 0)
|
||||
*b-- = 0;
|
||||
runtime·memclr((byte*)(b - n + 1), n*PtrSize);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -70,10 +70,8 @@ enum {
|
|||
// the off/16+1'th word before mheap.arena_start. (On a 32-bit system,
|
||||
// the only difference is that the divisor is 8.)
|
||||
|
||||
#define bitMiddle ((uintptr)0) // middle of an object
|
||||
#define bitBoundary ((uintptr)1) // boundary on a non-allocated object
|
||||
#define bitAllocated ((uintptr)2) // boundary on an allocated object
|
||||
#define bitMarked ((uintptr)3) // boundary on an allocated and marked object
|
||||
#define bitBoundary ((uintptr)1) // boundary of an object
|
||||
#define bitMarked ((uintptr)2) // marked object
|
||||
|
||||
#define bitMask ((uintptr)bitMiddle|bitBoundary|bitAllocated|bitMarked)
|
||||
#define bitMask ((uintptr)bitBoundary|bitMarked)
|
||||
#define bitPtrMask ((uintptr)BitsMask<<2)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue