mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
undo CL 104200047 / 318b04f28372
Breaks windows and race detector. TBR=rsc ««« original CL description runtime: stack allocator, separate from mallocgc In order to move malloc to Go, we need to have a separate stack allocator. If we run out of stack during malloc, malloc will not be available to allocate a new stack. Stacks are the last remaining FlagNoGC objects in the GC heap. Once they are out, we can get rid of the distinction between the allocated/blockboundary bits. (This will be in a separate change.) Fixes #7468 Fixes #7424 LGTM=rsc, dvyukov R=golang-codereviews, dvyukov, khr, dave, rsc CC=golang-codereviews https://golang.org/cl/104200047 »»» TBR=rsc CC=golang-codereviews https://golang.org/cl/101570044
This commit is contained in:
parent
7c13860cd0
commit
3cf83c182a
10 changed files with 297 additions and 487 deletions
|
|
@ -21,163 +21,76 @@ enum
|
|||
StackDebug = 0,
|
||||
StackFromSystem = 0, // allocate stacks from system memory instead of the heap
|
||||
StackFaultOnFree = 0, // old stacks are mapped noaccess to detect use after free
|
||||
|
||||
StackCache = 1,
|
||||
};
|
||||
|
||||
// Global pool of spans that have free stacks.
|
||||
// Stacks are assigned an order according to size.
|
||||
// order = log_2(size/FixedStack)
|
||||
// There is a free list for each order.
|
||||
static MSpan stackpool[NumStackOrders];
|
||||
static Lock stackpoolmu;
|
||||
// TODO: one lock per order?
|
||||
|
||||
void
|
||||
runtime·stackinit(void)
|
||||
typedef struct StackCacheNode StackCacheNode;
|
||||
struct StackCacheNode
|
||||
{
|
||||
int32 i;
|
||||
StackCacheNode *next;
|
||||
void* batch[StackCacheBatch-1];
|
||||
};
|
||||
|
||||
for(i = 0; i < NumStackOrders; i++)
|
||||
runtime·MSpanList_Init(&stackpool[i]);
|
||||
}
|
||||
static StackCacheNode *stackcache;
|
||||
static Lock stackcachemu;
|
||||
|
||||
// Allocates a stack from the free pool. Must be called with
|
||||
// stackpoolmu held.
|
||||
static MLink*
|
||||
poolalloc(uint8 order)
|
||||
{
|
||||
MSpan *list;
|
||||
MSpan *s;
|
||||
MLink *x;
|
||||
uintptr i;
|
||||
|
||||
list = &stackpool[order];
|
||||
s = list->next;
|
||||
if(s == list) {
|
||||
// no free stacks. Allocate another span worth.
|
||||
s = runtime·MHeap_AllocStack(&runtime·mheap, StackCacheSize >> PageShift);
|
||||
if(s == nil)
|
||||
runtime·throw("out of memory");
|
||||
for(i = 0; i < StackCacheSize; i += FixedStack << order) {
|
||||
x = (MLink*)((s->start << PageShift) + i);
|
||||
x->next = s->freelist;
|
||||
s->freelist = x;
|
||||
}
|
||||
}
|
||||
x = s->freelist;
|
||||
s->freelist = x->next;
|
||||
s->ref--;
|
||||
if(s->ref == 0) {
|
||||
// all stacks in s are allocated.
|
||||
runtime·MSpanList_Remove(s);
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
// Adds stack x to the free pool. Must be called with stackpoolmu held.
|
||||
// stackcacherefill/stackcacherelease implement a global cache of stack segments.
|
||||
// The cache is required to prevent unlimited growth of per-thread caches.
|
||||
static void
|
||||
poolfree(MLink *x, uint8 order)
|
||||
stackcacherefill(void)
|
||||
{
|
||||
MSpan *s;
|
||||
StackCacheNode *n;
|
||||
int32 i, pos;
|
||||
|
||||
s = runtime·MHeap_Lookup(&runtime·mheap, x);
|
||||
x->next = s->freelist;
|
||||
s->freelist = x;
|
||||
if(s->ref == 0) {
|
||||
// s now has a free stack
|
||||
runtime·MSpanList_Insert(&stackpool[order], s);
|
||||
runtime·lock(&stackcachemu);
|
||||
n = stackcache;
|
||||
if(n)
|
||||
stackcache = n->next;
|
||||
runtime·unlock(&stackcachemu);
|
||||
if(n == nil) {
|
||||
n = (StackCacheNode*)runtime·SysAlloc(FixedStack*StackCacheBatch, &mstats.stacks_sys);
|
||||
if(n == nil)
|
||||
runtime·throw("out of memory (stackcacherefill)");
|
||||
for(i = 0; i < StackCacheBatch-1; i++)
|
||||
n->batch[i] = (byte*)n + (i+1)*FixedStack;
|
||||
}
|
||||
s->ref++;
|
||||
if(s->ref == (StackCacheSize / FixedStack) >> order) {
|
||||
// span is completely free - return to heap
|
||||
runtime·MSpanList_Remove(s);
|
||||
runtime·MHeap_FreeStack(&runtime·mheap, s);
|
||||
pos = g->m->stackcachepos;
|
||||
for(i = 0; i < StackCacheBatch-1; i++) {
|
||||
g->m->stackcache[pos] = n->batch[i];
|
||||
pos = (pos + 1) % StackCacheSize;
|
||||
}
|
||||
}
|
||||
|
||||
// stackcacherefill/stackcacherelease implement a global pool of stack segments.
|
||||
// The pool is required to prevent unlimited growth of per-thread caches.
|
||||
static void
|
||||
stackcacherefill(MCache *c, uint8 order)
|
||||
{
|
||||
MLink *x, *list;
|
||||
uintptr size;
|
||||
|
||||
if(StackDebug >= 1)
|
||||
runtime·printf("stackcacherefill order=%d\n", order);
|
||||
|
||||
// Grab some stacks from the global cache.
|
||||
// Grab half of the allowed capacity (to prevent thrashing).
|
||||
list = nil;
|
||||
size = 0;
|
||||
runtime·lock(&stackpoolmu);
|
||||
while(size < StackCacheSize/2) {
|
||||
x = poolalloc(order);
|
||||
x->next = list;
|
||||
list = x;
|
||||
size += FixedStack << order;
|
||||
}
|
||||
runtime·unlock(&stackpoolmu);
|
||||
|
||||
c->stackcache[order].list = list;
|
||||
c->stackcache[order].size = size;
|
||||
g->m->stackcache[pos] = n;
|
||||
pos = (pos + 1) % StackCacheSize;
|
||||
g->m->stackcachepos = pos;
|
||||
g->m->stackcachecnt += StackCacheBatch;
|
||||
}
|
||||
|
||||
static void
|
||||
stackcacherelease(MCache *c, uint8 order)
|
||||
stackcacherelease(void)
|
||||
{
|
||||
MLink *x, *y;
|
||||
uintptr size;
|
||||
StackCacheNode *n;
|
||||
uint32 i, pos;
|
||||
|
||||
if(StackDebug >= 1)
|
||||
runtime·printf("stackcacherelease order=%d\n", order);
|
||||
x = c->stackcache[order].list;
|
||||
size = c->stackcache[order].size;
|
||||
runtime·lock(&stackpoolmu);
|
||||
while(size > StackCacheSize/2) {
|
||||
y = x->next;
|
||||
poolfree(x, order);
|
||||
x = y;
|
||||
size -= FixedStack << order;
|
||||
pos = (g->m->stackcachepos - g->m->stackcachecnt) % StackCacheSize;
|
||||
n = (StackCacheNode*)g->m->stackcache[pos];
|
||||
pos = (pos + 1) % StackCacheSize;
|
||||
for(i = 0; i < StackCacheBatch-1; i++) {
|
||||
n->batch[i] = g->m->stackcache[pos];
|
||||
pos = (pos + 1) % StackCacheSize;
|
||||
}
|
||||
runtime·unlock(&stackpoolmu);
|
||||
c->stackcache[order].list = x;
|
||||
c->stackcache[order].size = size;
|
||||
}
|
||||
|
||||
void
|
||||
runtime·stackcache_clear(MCache *c)
|
||||
{
|
||||
uint8 order;
|
||||
MLink *x, *y;
|
||||
|
||||
if(StackDebug >= 1)
|
||||
runtime·printf("stackcache clear\n");
|
||||
runtime·lock(&stackpoolmu);
|
||||
for(order = 0; order < NumStackOrders; order++) {
|
||||
x = c->stackcache[order].list;
|
||||
while(x != nil) {
|
||||
y = x->next;
|
||||
poolfree(x, order);
|
||||
x = y;
|
||||
}
|
||||
c->stackcache[order].list = nil;
|
||||
c->stackcache[order].size = 0;
|
||||
}
|
||||
runtime·unlock(&stackpoolmu);
|
||||
g->m->stackcachecnt -= StackCacheBatch;
|
||||
runtime·lock(&stackcachemu);
|
||||
n->next = stackcache;
|
||||
stackcache = n;
|
||||
runtime·unlock(&stackcachemu);
|
||||
}
|
||||
|
||||
void*
|
||||
runtime·stackalloc(G *gp, uint32 n)
|
||||
{
|
||||
uint8 order;
|
||||
uint32 n2;
|
||||
uint32 pos;
|
||||
void *v;
|
||||
bool malloced;
|
||||
Stktop *top;
|
||||
MLink *x;
|
||||
MSpan *s;
|
||||
MCache *c;
|
||||
|
||||
// Stackalloc must be called on scheduler stack, so that we
|
||||
// never try to grow the stack during the code that stackalloc runs.
|
||||
|
|
@ -197,58 +110,41 @@ runtime·stackalloc(G *gp, uint32 n)
|
|||
return v;
|
||||
}
|
||||
|
||||
// Small stacks are allocated with a fixed-size free-list allocator.
|
||||
// If we need a stack of a bigger size, we fall back on allocating
|
||||
// a dedicated span.
|
||||
if(StackCache && n < FixedStack << NumStackOrders) {
|
||||
order = 0;
|
||||
n2 = n;
|
||||
while(n2 > FixedStack) {
|
||||
order++;
|
||||
n2 >>= 1;
|
||||
// Minimum-sized stacks are allocated with a fixed-size free-list allocator,
|
||||
// but if we need a stack of a bigger size, we fall back on malloc
|
||||
// (assuming that inside malloc all the stack frames are small,
|
||||
// so that we do not deadlock).
|
||||
malloced = true;
|
||||
if(n == FixedStack || g->m->mallocing) {
|
||||
if(n != FixedStack) {
|
||||
runtime·printf("stackalloc: in malloc, size=%d want %d\n", FixedStack, n);
|
||||
runtime·throw("stackalloc");
|
||||
}
|
||||
c = g->m->mcache;
|
||||
if(c == nil) {
|
||||
// This can happen in the guts of exitsyscall or
|
||||
// procresize. Just get a stack from the global pool.
|
||||
runtime·lock(&stackpoolmu);
|
||||
x = poolalloc(order);
|
||||
runtime·unlock(&stackpoolmu);
|
||||
} else {
|
||||
x = c->stackcache[order].list;
|
||||
if(x == nil) {
|
||||
stackcacherefill(c, order);
|
||||
x = c->stackcache[order].list;
|
||||
}
|
||||
c->stackcache[order].list = x->next;
|
||||
c->stackcache[order].size -= n;
|
||||
}
|
||||
v = (byte*)x;
|
||||
} else {
|
||||
s = runtime·MHeap_AllocStack(&runtime·mheap, (n+PageSize-1) >> PageShift);
|
||||
if(s == nil)
|
||||
runtime·throw("out of memory");
|
||||
v = (byte*)(s->start<<PageShift);
|
||||
}
|
||||
if(g->m->stackcachecnt == 0)
|
||||
stackcacherefill();
|
||||
pos = g->m->stackcachepos;
|
||||
pos = (pos - 1) % StackCacheSize;
|
||||
v = g->m->stackcache[pos];
|
||||
g->m->stackcachepos = pos;
|
||||
g->m->stackcachecnt--;
|
||||
g->m->stackinuse++;
|
||||
malloced = false;
|
||||
} else
|
||||
v = runtime·mallocgc(n, 0, FlagNoProfiling|FlagNoGC|FlagNoZero|FlagNoInvokeGC);
|
||||
|
||||
top = (Stktop*)((byte*)v+n-sizeof(Stktop));
|
||||
runtime·memclr((byte*)top, sizeof(*top));
|
||||
if(StackDebug >= 1)
|
||||
runtime·printf(" allocated %p\n", v);
|
||||
top->malloced = malloced;
|
||||
return v;
|
||||
}
|
||||
|
||||
void
|
||||
runtime·stackfree(G *gp, void *v, Stktop *top)
|
||||
{
|
||||
uint8 order;
|
||||
uintptr n, n2;
|
||||
MSpan *s;
|
||||
MLink *x;
|
||||
MCache *c;
|
||||
uint32 pos;
|
||||
uintptr n;
|
||||
|
||||
n = (uintptr)(top+1) - (uintptr)v;
|
||||
if(n & (n-1))
|
||||
runtime·throw("stack not a power of 2");
|
||||
if(StackDebug >= 1)
|
||||
runtime·printf("stackfree %p %d\n", v, (int32)n);
|
||||
gp->stacksize -= n;
|
||||
|
|
@ -259,34 +155,19 @@ runtime·stackfree(G *gp, void *v, Stktop *top)
|
|||
runtime·SysFree(v, n, &mstats.stacks_sys);
|
||||
return;
|
||||
}
|
||||
if(StackCache && n < FixedStack << NumStackOrders) {
|
||||
order = 0;
|
||||
n2 = n;
|
||||
while(n2 > FixedStack) {
|
||||
order++;
|
||||
n2 >>= 1;
|
||||
}
|
||||
x = (MLink*)v;
|
||||
c = g->m->mcache;
|
||||
if(c == nil) {
|
||||
runtime·lock(&stackpoolmu);
|
||||
poolfree(x, order);
|
||||
runtime·unlock(&stackpoolmu);
|
||||
} else {
|
||||
if(c->stackcache[order].size >= StackCacheSize)
|
||||
stackcacherelease(c, order);
|
||||
x->next = c->stackcache[order].list;
|
||||
c->stackcache[order].list = x;
|
||||
c->stackcache[order].size += n;
|
||||
}
|
||||
} else {
|
||||
s = runtime·MHeap_Lookup(&runtime·mheap, v);
|
||||
if(s->state != MSpanStack) {
|
||||
runtime·printf("%p %p\n", s->start<<PageShift, v);
|
||||
runtime·throw("bad span state");
|
||||
}
|
||||
runtime·MHeap_FreeStack(&runtime·mheap, s);
|
||||
if(top->malloced) {
|
||||
runtime·free(v);
|
||||
return;
|
||||
}
|
||||
if(n != FixedStack)
|
||||
runtime·throw("stackfree: bad fixed size");
|
||||
if(g->m->stackcachecnt == StackCacheSize)
|
||||
stackcacherelease();
|
||||
pos = g->m->stackcachepos;
|
||||
g->m->stackcache[pos] = v;
|
||||
g->m->stackcachepos = (pos + 1) % StackCacheSize;
|
||||
g->m->stackcachecnt++;
|
||||
g->m->stackinuse--;
|
||||
}
|
||||
|
||||
// Called from runtime·lessstack when returning from a function which
|
||||
|
|
@ -718,6 +599,7 @@ copystack(G *gp, uintptr nframes, uintptr newsize)
|
|||
uintptr oldsize, used;
|
||||
AdjustInfo adjinfo;
|
||||
Stktop *oldtop, *newtop;
|
||||
bool malloced;
|
||||
|
||||
if(gp->syscallstack != 0)
|
||||
runtime·throw("can't handle stack copy in syscall yet");
|
||||
|
|
@ -731,9 +613,10 @@ copystack(G *gp, uintptr nframes, uintptr newsize)
|
|||
newstk = runtime·stackalloc(gp, newsize);
|
||||
newbase = newstk + newsize;
|
||||
newtop = (Stktop*)(newbase - sizeof(Stktop));
|
||||
malloced = newtop->malloced;
|
||||
|
||||
if(StackDebug >= 1)
|
||||
runtime·printf("copystack gp=%p [%p %p]/%d -> [%p %p]/%d\n", gp, oldstk, oldbase, (int32)oldsize, newstk, newbase, (int32)newsize);
|
||||
runtime·printf("copystack [%p %p]/%d -> [%p %p]/%d\n", oldstk, oldbase, (int32)oldsize, newstk, newbase, (int32)newsize);
|
||||
USED(oldsize);
|
||||
|
||||
// adjust pointers in the to-be-copied frames
|
||||
|
|
@ -748,6 +631,7 @@ copystack(G *gp, uintptr nframes, uintptr newsize)
|
|||
|
||||
// copy the stack (including Stktop) to the new location
|
||||
runtime·memmove(newbase - used, oldbase - used, used);
|
||||
newtop->malloced = malloced;
|
||||
|
||||
// Swap out old stack for new one
|
||||
gp->stackbase = (uintptr)newtop;
|
||||
|
|
@ -908,7 +792,7 @@ runtime·newstack(void)
|
|||
top = (Stktop*)(stk+framesize-sizeof(*top));
|
||||
|
||||
if(StackDebug >= 1) {
|
||||
runtime·printf("\t-> new stack gp=%p [%p, %p]\n", gp, stk, top);
|
||||
runtime·printf("\t-> new stack [%p, %p]\n", stk, top);
|
||||
}
|
||||
|
||||
top->stackbase = gp->stackbase;
|
||||
|
|
@ -997,6 +881,7 @@ runtime·shrinkstack(G *gp)
|
|||
int32 nframes;
|
||||
byte *oldstk, *oldbase;
|
||||
uintptr used, oldsize, newsize;
|
||||
MSpan *span;
|
||||
|
||||
if(!runtime·copystack)
|
||||
return;
|
||||
|
|
@ -1010,14 +895,53 @@ runtime·shrinkstack(G *gp)
|
|||
if(used >= oldsize / 4)
|
||||
return; // still using at least 1/4 of the segment.
|
||||
|
||||
if(gp->syscallstack != (uintptr)nil) // TODO: can we handle this case?
|
||||
return;
|
||||
// To shrink to less than 1/2 a page, we need to copy.
|
||||
if(newsize < PageSize/2) {
|
||||
if(gp->syscallstack != (uintptr)nil) // TODO: can we handle this case?
|
||||
return;
|
||||
#ifdef GOOS_windows
|
||||
if(gp->m != nil && gp->m->libcallsp != 0)
|
||||
return;
|
||||
if(gp->m != nil && gp->m->libcallsp != 0)
|
||||
return;
|
||||
#endif
|
||||
nframes = copyabletopsegment(gp);
|
||||
if(nframes == -1)
|
||||
nframes = copyabletopsegment(gp);
|
||||
if(nframes == -1)
|
||||
return;
|
||||
copystack(gp, nframes, newsize);
|
||||
return;
|
||||
copystack(gp, nframes, newsize);
|
||||
}
|
||||
|
||||
// To shrink a stack of one page size or more, we can shrink it
|
||||
// without copying. Just deallocate the lower half.
|
||||
span = runtime·MHeap_LookupMaybe(&runtime·mheap, oldstk);
|
||||
if(span == nil)
|
||||
return; // stack allocated outside heap. Can't shrink it. Can happen if stack is allocated while inside malloc. TODO: shrink by copying?
|
||||
if(span->elemsize != oldsize)
|
||||
runtime·throw("span element size doesn't match stack size");
|
||||
if((uintptr)oldstk != span->start << PageShift)
|
||||
runtime·throw("stack not at start of span");
|
||||
|
||||
if(StackDebug)
|
||||
runtime·printf("shrinking stack in place %p %X->%X\n", oldstk, oldsize, newsize);
|
||||
|
||||
// new stack guard for smaller stack
|
||||
gp->stackguard = (uintptr)oldstk + newsize + StackGuard;
|
||||
gp->stackguard0 = (uintptr)oldstk + newsize + StackGuard;
|
||||
if(gp->stack0 == (uintptr)oldstk)
|
||||
gp->stack0 = (uintptr)oldstk + newsize;
|
||||
gp->stacksize -= oldsize - newsize;
|
||||
|
||||
// Free bottom half of the stack.
|
||||
if(runtime·debug.efence || StackFromSystem) {
|
||||
if(runtime·debug.efence || StackFaultOnFree)
|
||||
runtime·SysFault(oldstk, newsize);
|
||||
else
|
||||
runtime·SysFree(oldstk, newsize, &mstats.stacks_sys);
|
||||
return;
|
||||
}
|
||||
// First, we trick malloc into thinking
|
||||
// we allocated the stack as two separate half-size allocs. Then the
|
||||
// free() call does the rest of the work for us.
|
||||
runtime·MSpan_EnsureSwept(span);
|
||||
runtime·MHeap_SplitSpan(&runtime·mheap, span);
|
||||
runtime·free(oldstk);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue