2014-11-11 17:04:34 -05:00
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runtime
2015-11-02 14:09:24 -05:00
import (
"runtime/internal/atomic"
2015-11-11 12:39:30 -05:00
"runtime/internal/sys"
2015-11-02 14:09:24 -05:00
"unsafe"
)
2014-11-11 17:04:34 -05:00
2015-10-16 18:45:30 -07:00
/ *
Stack layout parameters .
Included both by runtime ( compiled via 6 c ) and linkers ( compiled via gcc ) .
The per - goroutine g - > stackguard is set to point StackGuard bytes
above the bottom of the stack . Each function compares its stack
pointer against g - > stackguard to check for overflow . To cut one
instruction from the check sequence for functions with tiny frames ,
the stack is allowed to protrude StackSmall bytes below the stack
guard . Functions with large frames don ' t bother with the check and
always call morestack . The sequences are ( for amd64 , others are
similar ) :
guard = g - > stackguard
frame = function ' s stack frame size
argsize = size of function arguments ( call + return )
stack frame size <= StackSmall :
CMPQ guard , SP
JHI 3 ( PC )
MOVQ m - > morearg , $ ( argsize << 32 )
CALL morestack ( SB )
stack frame size > StackSmall but < StackBig
LEAQ ( frame - StackSmall ) ( SP ) , R0
CMPQ guard , R0
JHI 3 ( PC )
MOVQ m - > morearg , $ ( argsize << 32 )
CALL morestack ( SB )
stack frame size >= StackBig :
MOVQ m - > morearg , $ ( ( argsize << 32 ) | frame )
CALL morestack ( SB )
The bottom StackGuard - StackSmall bytes are important : there has
to be enough room to execute functions that refuse to check for
stack overflow , either because they need to be adjacent to the
actual caller ' s frame ( deferproc ) or because they handle the imminent
stack overflow ( morestack ) .
For example , deferproc might call malloc , which does one of the
above checks ( without allocating a full frame ) , which might trigger
a call to morestack . This sequence needs to fit in the bottom
section of the stack . On amd64 , morestack ' s frame is 40 bytes , and
deferproc ' s frame is 56 bytes . That fits well within the
StackGuard - StackSmall bytes at the bottom .
The linkers explore all possible call traces involving non - splitting
functions to make sure that this limit cannot be violated .
* /
const (
// StackSystem is a number of additional bytes to add
// to each stack below the usual guard area for OS-specific
// purposes like signal handling. Used on Windows, Plan 9,
// and Darwin/ARM because they do not use a separate stack.
2015-11-11 12:39:30 -05:00
_StackSystem = sys . GoosWindows * 512 * sys . PtrSize + sys . GoosPlan9 * 512 + sys . GoosDarwin * sys . GoarchArm * 1024
2015-10-16 18:45:30 -07:00
// The minimum size of stack used by Go code
_StackMin = 2048
// The minimum stack size to allocate.
// The hackery here rounds FixedStack0 up to a power of 2.
_FixedStack0 = _StackMin + _StackSystem
_FixedStack1 = _FixedStack0 - 1
_FixedStack2 = _FixedStack1 | ( _FixedStack1 >> 1 )
_FixedStack3 = _FixedStack2 | ( _FixedStack2 >> 2 )
_FixedStack4 = _FixedStack3 | ( _FixedStack3 >> 4 )
_FixedStack5 = _FixedStack4 | ( _FixedStack4 >> 8 )
_FixedStack6 = _FixedStack5 | ( _FixedStack5 >> 16 )
_FixedStack = _FixedStack6 + 1
// Functions that need frames bigger than this use an extra
// instruction to do the stack split check, to avoid overflow
// in case SP - framesize wraps below zero.
// This value can be no bigger than the size of the unmapped
// space at zero.
_StackBig = 4096
// The stack guard is a pointer this many bytes above the
// bottom of the stack.
2015-10-30 12:47:24 +13:00
_StackGuard = 720 * sys . StackGuardMultiplier + _StackSystem
2015-10-16 18:45:30 -07:00
// After a stack split check the SP is allowed to be this
2016-03-01 23:21:55 +00:00
// many bytes below the stack guard. This saves an instruction
2015-10-16 18:45:30 -07:00
// in the checking sequence for tiny frames.
_StackSmall = 128
// The maximum number of bytes that a chain of NOSPLIT
// functions can use.
_StackLimit = _StackGuard - _StackSystem - _StackSmall
)
// Goroutine preemption request.
// Stored into g->stackguard0 to cause split stack check failure.
// Must be greater than any real sp.
// 0xfffffade in hex.
const (
_StackPreempt = uintptrMask & - 1314
_StackFork = uintptrMask & - 1234
)
2014-11-11 17:04:34 -05:00
const (
2015-05-01 15:53:45 +10:00
// stackDebug == 0: no logging
2014-11-11 17:04:34 -05:00
// == 1: logging of per-stack operations
// == 2: logging of per-frame operations
// == 3: logging of per-word updates
// == 4: logging of per-word reads
stackDebug = 0
stackFromSystem = 0 // allocate stacks from system memory instead of the heap
stackFaultOnFree = 0 // old stacks are mapped noaccess to detect use after free
stackPoisonCopy = 0 // fill stack that should not be accessed with garbage, to detect bad dereferences during copy
stackCache = 1
)
const (
2015-11-11 12:39:30 -05:00
uintptrMask = 1 << ( 8 * sys . PtrSize ) - 1
2014-11-11 17:04:34 -05:00
poisonStack = uintptrMask & 0x6868686868686868
// Goroutine preemption request.
2015-01-05 16:29:21 +00:00
// Stored into g->stackguard0 to cause split stack check failure.
2014-11-11 17:04:34 -05:00
// Must be greater than any real sp.
// 0xfffffade in hex.
stackPreempt = uintptrMask & - 1314
// Thread is forking.
2015-01-05 16:29:21 +00:00
// Stored into g->stackguard0 to cause split stack check failure.
2014-11-11 17:04:34 -05:00
// Must be greater than any real sp.
stackFork = uintptrMask & - 1234
)
// Global pool of spans that have free stacks.
// Stacks are assigned an order according to size.
// order = log_2(size/FixedStack)
// There is a free list for each order.
// TODO: one lock per order?
2015-10-15 15:59:49 -07:00
var stackpool [ _NumStackOrders ] mSpanList
2014-11-11 17:04:34 -05:00
var stackpoolmu mutex
2015-12-14 14:30:25 -05:00
// Global pool of large stack spans.
var stackLarge struct {
lock mutex
free [ _MHeapMap_Bits ] mSpanList // free lists by log_2(s.npages)
}
2014-11-11 17:04:34 -05:00
2015-01-14 11:09:50 -05:00
// Cached value of haveexperiment("framepointer")
var framepointer_enabled bool
2014-11-11 17:04:34 -05:00
func stackinit ( ) {
if _StackCacheSize & _PageMask != 0 {
2014-12-27 20:58:00 -08:00
throw ( "cache size must be a multiple of page size" )
2014-11-11 17:04:34 -05:00
}
for i := range stackpool {
2015-11-11 16:13:51 -08:00
stackpool [ i ] . init ( )
2014-11-11 17:04:34 -05:00
}
2015-12-14 14:30:25 -05:00
for i := range stackLarge . free {
stackLarge . free [ i ] . init ( )
}
}
// stacklog2 returns ⌊log_2(n)⌋.
func stacklog2 ( n uintptr ) int {
log2 := 0
for n > 1 {
n >>= 1
log2 ++
}
return log2
2014-11-11 17:04:34 -05:00
}
2016-03-01 23:21:55 +00:00
// Allocates a stack from the free pool. Must be called with
2014-11-11 17:04:34 -05:00
// stackpoolmu held.
2014-11-20 12:08:13 -05:00
func stackpoolalloc ( order uint8 ) gclinkptr {
2014-11-11 17:04:34 -05:00
list := & stackpool [ order ]
2015-10-15 15:59:49 -07:00
s := list . first
if s == nil {
2016-03-01 23:21:55 +00:00
// no free stacks. Allocate another span worth.
2015-11-11 16:13:51 -08:00
s = mheap_ . allocStack ( _StackCacheSize >> _PageShift )
2014-11-11 17:04:34 -05:00
if s == nil {
2014-12-27 20:58:00 -08:00
throw ( "out of memory" )
2014-11-11 17:04:34 -05:00
}
if s . ref != 0 {
2014-12-27 20:58:00 -08:00
throw ( "bad ref" )
2014-11-11 17:04:34 -05:00
}
2014-11-20 12:08:13 -05:00
if s . freelist . ptr ( ) != nil {
2014-12-27 20:58:00 -08:00
throw ( "bad freelist" )
2014-11-11 17:04:34 -05:00
}
for i := uintptr ( 0 ) ; i < _StackCacheSize ; i += _FixedStack << order {
2014-11-20 12:08:13 -05:00
x := gclinkptr ( uintptr ( s . start ) << _PageShift + i )
x . ptr ( ) . next = s . freelist
2014-11-11 17:04:34 -05:00
s . freelist = x
}
2015-11-11 16:13:51 -08:00
list . insert ( s )
2014-11-11 17:04:34 -05:00
}
x := s . freelist
2014-11-20 12:08:13 -05:00
if x . ptr ( ) == nil {
2014-12-27 20:58:00 -08:00
throw ( "span has no free stacks" )
2014-11-11 17:04:34 -05:00
}
2014-11-20 12:08:13 -05:00
s . freelist = x . ptr ( ) . next
2014-11-11 17:04:34 -05:00
s . ref ++
2014-11-20 12:08:13 -05:00
if s . freelist . ptr ( ) == nil {
2014-11-11 17:04:34 -05:00
// all stacks in s are allocated.
2015-11-11 16:13:51 -08:00
list . remove ( s )
2014-11-11 17:04:34 -05:00
}
return x
}
2016-03-01 23:21:55 +00:00
// Adds stack x to the free pool. Must be called with stackpoolmu held.
2014-11-20 12:08:13 -05:00
func stackpoolfree ( x gclinkptr , order uint8 ) {
2015-11-11 16:13:51 -08:00
s := mheap_ . lookup ( unsafe . Pointer ( x ) )
2014-11-11 17:04:34 -05:00
if s . state != _MSpanStack {
2014-12-27 20:58:00 -08:00
throw ( "freeing stack not in a stack span" )
2014-11-11 17:04:34 -05:00
}
2014-11-20 12:08:13 -05:00
if s . freelist . ptr ( ) == nil {
2014-11-11 17:04:34 -05:00
// s will now have a free stack
2015-11-11 16:13:51 -08:00
stackpool [ order ] . insert ( s )
2014-11-11 17:04:34 -05:00
}
2014-11-20 12:08:13 -05:00
x . ptr ( ) . next = s . freelist
2014-11-11 17:04:34 -05:00
s . freelist = x
s . ref --
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
if gcphase == _GCoff && s . ref == 0 {
// Span is completely free. Return it to the heap
// immediately if we're sweeping.
//
// If GC is active, we delay the free until the end of
// GC to avoid the following type of situation:
//
// 1) GC starts, scans a SudoG but does not yet mark the SudoG.elem pointer
// 2) The stack that pointer points to is copied
// 3) The old stack is freed
// 4) The containing span is marked free
// 5) GC attempts to mark the SudoG.elem pointer. The
// marking fails because the pointer looks like a
// pointer into a free span.
//
// By not freeing, we prevent step #4 until GC is done.
2015-11-11 16:13:51 -08:00
stackpool [ order ] . remove ( s )
2014-11-20 12:08:13 -05:00
s . freelist = 0
2015-11-11 16:13:51 -08:00
mheap_ . freeStack ( s )
2014-11-11 17:04:34 -05:00
}
}
// stackcacherefill/stackcacherelease implement a global pool of stack segments.
// The pool is required to prevent unlimited growth of per-thread caches.
func stackcacherefill ( c * mcache , order uint8 ) {
if stackDebug >= 1 {
print ( "stackcacherefill order=" , order , "\n" )
}
// Grab some stacks from the global cache.
// Grab half of the allowed capacity (to prevent thrashing).
2014-11-20 12:08:13 -05:00
var list gclinkptr
2014-11-11 17:04:34 -05:00
var size uintptr
lock ( & stackpoolmu )
for size < _StackCacheSize / 2 {
x := stackpoolalloc ( order )
2014-11-20 12:08:13 -05:00
x . ptr ( ) . next = list
2014-11-11 17:04:34 -05:00
list = x
size += _FixedStack << order
}
unlock ( & stackpoolmu )
c . stackcache [ order ] . list = list
c . stackcache [ order ] . size = size
}
func stackcacherelease ( c * mcache , order uint8 ) {
if stackDebug >= 1 {
print ( "stackcacherelease order=" , order , "\n" )
}
x := c . stackcache [ order ] . list
size := c . stackcache [ order ] . size
lock ( & stackpoolmu )
for size > _StackCacheSize / 2 {
2014-11-20 12:08:13 -05:00
y := x . ptr ( ) . next
2014-11-11 17:04:34 -05:00
stackpoolfree ( x , order )
x = y
size -= _FixedStack << order
}
unlock ( & stackpoolmu )
c . stackcache [ order ] . list = x
c . stackcache [ order ] . size = size
}
func stackcache_clear ( c * mcache ) {
if stackDebug >= 1 {
print ( "stackcache clear\n" )
}
lock ( & stackpoolmu )
for order := uint8 ( 0 ) ; order < _NumStackOrders ; order ++ {
x := c . stackcache [ order ] . list
2014-11-20 12:08:13 -05:00
for x . ptr ( ) != nil {
y := x . ptr ( ) . next
2014-11-11 17:04:34 -05:00
stackpoolfree ( x , order )
x = y
}
2014-11-20 12:08:13 -05:00
c . stackcache [ order ] . list = 0
2014-11-11 17:04:34 -05:00
c . stackcache [ order ] . size = 0
}
unlock ( & stackpoolmu )
}
2015-05-20 16:16:04 -04:00
func stackalloc ( n uint32 ) ( stack , [ ] stkbar ) {
2014-11-11 17:04:34 -05:00
// Stackalloc must be called on scheduler stack, so that we
// never try to grow the stack during the code that stackalloc runs.
// Doing so would cause a deadlock (issue 1547).
thisg := getg ( )
if thisg != thisg . m . g0 {
2014-12-27 20:58:00 -08:00
throw ( "stackalloc not on scheduler stack" )
2014-11-11 17:04:34 -05:00
}
if n & ( n - 1 ) != 0 {
2014-12-27 20:58:00 -08:00
throw ( "stack size not a power of 2" )
2014-11-11 17:04:34 -05:00
}
if stackDebug >= 1 {
print ( "stackalloc " , n , "\n" )
}
2015-05-20 16:16:04 -04:00
// Compute the size of stack barrier array.
maxstkbar := gcMaxStackBarriers ( int ( n ) )
nstkbar := unsafe . Sizeof ( stkbar { } ) * uintptr ( maxstkbar )
2014-11-11 17:04:34 -05:00
if debug . efence != 0 || stackFromSystem != 0 {
v := sysAlloc ( round ( uintptr ( n ) , _PageSize ) , & memstats . stacks_sys )
if v == nil {
2014-12-27 20:58:00 -08:00
throw ( "out of memory (stackalloc)" )
2014-11-11 17:04:34 -05:00
}
2015-05-20 16:16:04 -04:00
top := uintptr ( n ) - nstkbar
stkbarSlice := slice { add ( v , top ) , 0 , maxstkbar }
return stack { uintptr ( v ) , uintptr ( v ) + top } , * ( * [ ] stkbar ) ( unsafe . Pointer ( & stkbarSlice ) )
2014-11-11 17:04:34 -05:00
}
// Small stacks are allocated with a fixed-size free-list allocator.
// If we need a stack of a bigger size, we fall back on allocating
// a dedicated span.
var v unsafe . Pointer
if stackCache != 0 && n < _FixedStack << _NumStackOrders && n < _StackCacheSize {
order := uint8 ( 0 )
n2 := n
for n2 > _FixedStack {
order ++
n2 >>= 1
}
2014-11-20 12:08:13 -05:00
var x gclinkptr
2014-11-11 17:04:34 -05:00
c := thisg . m . mcache
2015-01-30 15:30:41 -05:00
if c == nil || thisg . m . preemptoff != "" || thisg . m . helpgc != 0 {
2014-11-11 17:04:34 -05:00
// c == nil can happen in the guts of exitsyscall or
// procresize. Just get a stack from the global pool.
// Also don't touch stackcache during gc
// as it's flushed concurrently.
lock ( & stackpoolmu )
x = stackpoolalloc ( order )
unlock ( & stackpoolmu )
} else {
x = c . stackcache [ order ] . list
2014-11-20 12:08:13 -05:00
if x . ptr ( ) == nil {
2014-11-11 17:04:34 -05:00
stackcacherefill ( c , order )
x = c . stackcache [ order ] . list
}
2014-11-20 12:08:13 -05:00
c . stackcache [ order ] . list = x . ptr ( ) . next
2014-11-11 17:04:34 -05:00
c . stackcache [ order ] . size -= uintptr ( n )
}
2015-10-15 14:33:50 -07:00
v = unsafe . Pointer ( x )
2014-11-11 17:04:34 -05:00
} else {
2015-12-14 14:30:25 -05:00
var s * mspan
npage := uintptr ( n ) >> _PageShift
log2npage := stacklog2 ( npage )
// Try to get a stack from the large stack cache.
lock ( & stackLarge . lock )
if ! stackLarge . free [ log2npage ] . isEmpty ( ) {
s = stackLarge . free [ log2npage ] . first
stackLarge . free [ log2npage ] . remove ( s )
}
unlock ( & stackLarge . lock )
2014-11-11 17:04:34 -05:00
if s == nil {
2015-12-14 14:30:25 -05:00
// Allocate a new stack from the heap.
s = mheap_ . allocStack ( npage )
if s == nil {
throw ( "out of memory" )
}
2014-11-11 17:04:34 -05:00
}
2015-10-15 14:33:50 -07:00
v = unsafe . Pointer ( s . start << _PageShift )
2014-11-11 17:04:34 -05:00
}
if raceenabled {
racemalloc ( v , uintptr ( n ) )
}
2015-10-21 11:04:42 -07:00
if msanenabled {
msanmalloc ( v , uintptr ( n ) )
}
2014-11-11 17:04:34 -05:00
if stackDebug >= 1 {
print ( " allocated " , v , "\n" )
}
2015-05-20 16:16:04 -04:00
top := uintptr ( n ) - nstkbar
stkbarSlice := slice { add ( v , top ) , 0 , maxstkbar }
return stack { uintptr ( v ) , uintptr ( v ) + top } , * ( * [ ] stkbar ) ( unsafe . Pointer ( & stkbarSlice ) )
2014-11-11 17:04:34 -05:00
}
2015-05-20 15:29:53 -04:00
func stackfree ( stk stack , n uintptr ) {
2014-11-11 17:04:34 -05:00
gp := getg ( )
2015-10-15 14:33:50 -07:00
v := unsafe . Pointer ( stk . lo )
2014-11-11 17:04:34 -05:00
if n & ( n - 1 ) != 0 {
2014-12-27 20:58:00 -08:00
throw ( "stack not a power of 2" )
2014-11-11 17:04:34 -05:00
}
2015-05-20 15:29:53 -04:00
if stk . lo + n < stk . hi {
throw ( "bad stack size" )
}
2014-11-11 17:04:34 -05:00
if stackDebug >= 1 {
println ( "stackfree" , v , n )
memclr ( v , n ) // for testing, clobber stack data
}
if debug . efence != 0 || stackFromSystem != 0 {
if debug . efence != 0 || stackFaultOnFree != 0 {
sysFault ( v , n )
} else {
sysFree ( v , n , & memstats . stacks_sys )
}
return
}
2015-10-21 11:04:42 -07:00
if msanenabled {
msanfree ( v , n )
}
2014-11-11 17:04:34 -05:00
if stackCache != 0 && n < _FixedStack << _NumStackOrders && n < _StackCacheSize {
order := uint8 ( 0 )
n2 := n
for n2 > _FixedStack {
order ++
n2 >>= 1
}
2014-11-20 12:08:13 -05:00
x := gclinkptr ( v )
2014-11-11 17:04:34 -05:00
c := gp . m . mcache
2015-01-30 15:30:41 -05:00
if c == nil || gp . m . preemptoff != "" || gp . m . helpgc != 0 {
2014-11-11 17:04:34 -05:00
lock ( & stackpoolmu )
stackpoolfree ( x , order )
unlock ( & stackpoolmu )
} else {
if c . stackcache [ order ] . size >= _StackCacheSize {
stackcacherelease ( c , order )
}
2014-11-20 12:08:13 -05:00
x . ptr ( ) . next = c . stackcache [ order ] . list
2014-11-11 17:04:34 -05:00
c . stackcache [ order ] . list = x
c . stackcache [ order ] . size += n
}
} else {
2015-11-11 16:13:51 -08:00
s := mheap_ . lookup ( v )
2014-11-11 17:04:34 -05:00
if s . state != _MSpanStack {
println ( hex ( s . start << _PageShift ) , v )
2014-12-27 20:58:00 -08:00
throw ( "bad span state" )
2014-11-11 17:04:34 -05:00
}
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
if gcphase == _GCoff {
// Free the stack immediately if we're
// sweeping.
2015-11-11 16:13:51 -08:00
mheap_ . freeStack ( s )
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
} else {
2015-12-14 14:30:25 -05:00
// If the GC is running, we can't return a
// stack span to the heap because it could be
// reused as a heap span, and this state
// change would race with GC. Add it to the
// large stack cache instead.
log2npage := stacklog2 ( s . npages )
lock ( & stackLarge . lock )
stackLarge . free [ log2npage ] . insert ( s )
unlock ( & stackLarge . lock )
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
}
2014-11-11 17:04:34 -05:00
}
}
var maxstacksize uintptr = 1 << 20 // enough until runtime.main sets it for real
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
var ptrnames = [ ] string {
0 : "scalar" ,
1 : "ptr" ,
2014-11-11 17:04:34 -05:00
}
// Stack frame layout
//
// (x86)
// +------------------+
// | args from caller |
// +------------------+ <- frame->argp
// | return address |
2015-01-14 11:09:50 -05:00
// +------------------+
// | caller's BP (*) | (*) if framepointer_enabled && varp < sp
2014-11-11 17:04:34 -05:00
// +------------------+ <- frame->varp
// | locals |
// +------------------+
// | args to callee |
// +------------------+ <- frame->sp
//
// (arm)
// +------------------+
// | args from caller |
// +------------------+ <- frame->argp
// | caller's retaddr |
// +------------------+ <- frame->varp
// | locals |
// +------------------+
// | args to callee |
// +------------------+
// | return address |
// +------------------+ <- frame->sp
type adjustinfo struct {
old stack
delta uintptr // ptr distance from old to new stack (newbase - oldbase)
runtime: add pcvalue cache to improve stack scan speed
The cost of scanning large stacks is currently dominated by the time
spent looking up and decoding the pcvalue table. However, large stacks
are usually large not because they contain calls to many different
functions, but because they contain many calls to the same, small set
of recursive functions. Hence, walking large stacks tends to make the
same pcvalue queries many times.
Based on this observation, this commit adds a small, very simple, and
fast cache in front of pcvalue lookup. We thread this cache down from
operations that make many pcvalue calls, such as gentraceback, stack
scanning, and stack adjusting.
This simple cache works well because it has minimal overhead when it's
not effective. I also tried a hashed direct-map cache, CLOCK-based
replacement, round-robin replacement, and round-robin with lookups
disabled until there had been at least 16 probes, but none of these
approaches had obvious wins over the random replacement policy in this
commit.
This nearly doubles the overall performance of the deep stack test
program from issue #10898:
name old time/op new time/op delta
Issue10898 16.5s ±12% 9.2s ±12% -44.37% (p=0.008 n=5+5)
It's a very slight win on the garbage benchmark:
name old time/op new time/op delta
XBenchGarbage-12 4.92ms ± 1% 4.89ms ± 1% -0.75% (p=0.000 n=18+19)
It's a wash (but doesn't harm performance) on the go1 benchmarks,
which don't have particularly deep stacks:
name old time/op new time/op delta
BinaryTree17-12 3.11s ± 2% 3.20s ± 3% +2.83% (p=0.000 n=17+20)
Fannkuch11-12 2.51s ± 1% 2.51s ± 1% -0.22% (p=0.034 n=19+18)
FmtFprintfEmpty-12 50.8ns ± 3% 50.6ns ± 2% ~ (p=0.793 n=20+20)
FmtFprintfString-12 174ns ± 0% 174ns ± 1% +0.17% (p=0.048 n=15+20)
FmtFprintfInt-12 177ns ± 0% 165ns ± 1% -6.99% (p=0.000 n=17+19)
FmtFprintfIntInt-12 283ns ± 1% 284ns ± 0% +0.22% (p=0.000 n=18+15)
FmtFprintfPrefixedInt-12 243ns ± 1% 244ns ± 1% +0.40% (p=0.000 n=20+19)
FmtFprintfFloat-12 318ns ± 0% 319ns ± 0% +0.27% (p=0.001 n=19+20)
FmtManyArgs-12 1.12µs ± 0% 1.14µs ± 0% +1.74% (p=0.000 n=19+20)
GobDecode-12 8.69ms ± 0% 8.73ms ± 1% +0.46% (p=0.000 n=18+18)
GobEncode-12 6.64ms ± 1% 6.61ms ± 1% -0.46% (p=0.000 n=20+20)
Gzip-12 323ms ± 2% 319ms ± 1% -1.11% (p=0.000 n=20+20)
Gunzip-12 42.8ms ± 0% 42.9ms ± 0% ~ (p=0.158 n=18+20)
HTTPClientServer-12 63.3µs ± 1% 63.1µs ± 1% -0.35% (p=0.011 n=20+20)
JSONEncode-12 16.9ms ± 1% 17.3ms ± 1% +2.84% (p=0.000 n=19+20)
JSONDecode-12 59.7ms ± 0% 58.5ms ± 0% -2.05% (p=0.000 n=19+17)
Mandelbrot200-12 3.92ms ± 0% 3.91ms ± 0% -0.16% (p=0.003 n=19+19)
GoParse-12 3.79ms ± 2% 3.75ms ± 2% -0.91% (p=0.005 n=20+20)
RegexpMatchEasy0_32-12 102ns ± 1% 101ns ± 1% -0.80% (p=0.001 n=14+20)
RegexpMatchEasy0_1K-12 337ns ± 1% 346ns ± 1% +2.90% (p=0.000 n=20+19)
RegexpMatchEasy1_32-12 84.4ns ± 2% 84.3ns ± 2% ~ (p=0.743 n=20+20)
RegexpMatchEasy1_1K-12 502ns ± 1% 505ns ± 0% +0.64% (p=0.000 n=20+20)
RegexpMatchMedium_32-12 133ns ± 1% 132ns ± 1% -0.85% (p=0.000 n=20+19)
RegexpMatchMedium_1K-12 40.1µs ± 1% 39.8µs ± 1% -0.77% (p=0.000 n=18+18)
RegexpMatchHard_32-12 2.08µs ± 1% 2.07µs ± 1% -0.55% (p=0.001 n=18+19)
RegexpMatchHard_1K-12 62.4µs ± 1% 62.0µs ± 1% -0.74% (p=0.000 n=19+19)
Revcomp-12 545ms ± 2% 545ms ± 3% ~ (p=0.771 n=19+20)
Template-12 73.7ms ± 1% 72.0ms ± 0% -2.33% (p=0.000 n=20+18)
TimeParse-12 358ns ± 1% 351ns ± 1% -2.07% (p=0.000 n=20+20)
TimeFormat-12 369ns ± 1% 356ns ± 0% -3.53% (p=0.000 n=20+18)
[Geo mean] 63.5µs 63.2µs -0.41%
name old speed new speed delta
GobDecode-12 88.3MB/s ± 0% 87.9MB/s ± 0% -0.43% (p=0.000 n=18+17)
GobEncode-12 116MB/s ± 1% 116MB/s ± 1% +0.47% (p=0.000 n=20+20)
Gzip-12 60.2MB/s ± 2% 60.8MB/s ± 1% +1.13% (p=0.000 n=20+20)
Gunzip-12 453MB/s ± 0% 453MB/s ± 0% ~ (p=0.160 n=18+20)
JSONEncode-12 115MB/s ± 1% 112MB/s ± 1% -2.76% (p=0.000 n=19+20)
JSONDecode-12 32.5MB/s ± 0% 33.2MB/s ± 0% +2.09% (p=0.000 n=19+17)
GoParse-12 15.3MB/s ± 2% 15.4MB/s ± 2% +0.92% (p=0.004 n=20+20)
RegexpMatchEasy0_32-12 311MB/s ± 1% 314MB/s ± 1% +0.78% (p=0.000 n=15+19)
RegexpMatchEasy0_1K-12 3.04GB/s ± 1% 2.95GB/s ± 1% -2.90% (p=0.000 n=19+19)
RegexpMatchEasy1_32-12 379MB/s ± 2% 380MB/s ± 2% ~ (p=0.779 n=20+20)
RegexpMatchEasy1_1K-12 2.04GB/s ± 1% 2.02GB/s ± 0% -0.62% (p=0.000 n=20+20)
RegexpMatchMedium_32-12 7.46MB/s ± 1% 7.53MB/s ± 1% +0.86% (p=0.000 n=20+19)
RegexpMatchMedium_1K-12 25.5MB/s ± 1% 25.7MB/s ± 1% +0.78% (p=0.000 n=18+18)
RegexpMatchHard_32-12 15.4MB/s ± 1% 15.5MB/s ± 1% +0.62% (p=0.000 n=19+19)
RegexpMatchHard_1K-12 16.4MB/s ± 1% 16.5MB/s ± 1% +0.82% (p=0.000 n=20+19)
Revcomp-12 466MB/s ± 2% 466MB/s ± 3% ~ (p=0.765 n=19+20)
Template-12 26.3MB/s ± 1% 27.0MB/s ± 0% +2.38% (p=0.000 n=20+18)
[Geo mean] 97.8MB/s 98.0MB/s +0.23%
Change-Id: I281044ae0b24990ba46487cacbc1069493274bc4
Reviewed-on: https://go-review.googlesource.com/13614
Reviewed-by: Keith Randall <khr@golang.org>
2015-08-12 23:43:43 -04:00
cache pcvalueCache
2014-11-11 17:04:34 -05:00
}
// Adjustpointer checks whether *vpp is in the old stack described by adjinfo.
// If so, it rewrites *vpp to point into the new stack.
func adjustpointer ( adjinfo * adjustinfo , vpp unsafe . Pointer ) {
2015-11-23 11:34:16 -05:00
pp := ( * uintptr ) ( vpp )
2014-11-11 17:04:34 -05:00
p := * pp
if stackDebug >= 4 {
2015-11-23 11:34:16 -05:00
print ( " " , pp , ":" , hex ( p ) , "\n" )
2014-11-11 17:04:34 -05:00
}
2015-11-23 11:34:16 -05:00
if adjinfo . old . lo <= p && p < adjinfo . old . hi {
* pp = p + adjinfo . delta
2014-11-11 17:04:34 -05:00
if stackDebug >= 3 {
2015-11-23 11:34:16 -05:00
print ( " adjust ptr " , pp , ":" , hex ( p ) , " -> " , hex ( * pp ) , "\n" )
2014-11-11 17:04:34 -05:00
}
}
}
2015-05-04 10:19:24 -04:00
// Information from the compiler about the layout of stack frames.
type bitvector struct {
n int32 // # of bits
bytedata * uint8
}
2014-11-11 17:04:34 -05:00
type gobitvector struct {
n uintptr
bytedata [ ] uint8
}
func gobv ( bv bitvector ) gobitvector {
return gobitvector {
uintptr ( bv . n ) ,
( * [ 1 << 30 ] byte ) ( unsafe . Pointer ( bv . bytedata ) ) [ : ( bv . n + 7 ) / 8 ] ,
}
}
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
func ptrbit ( bv * gobitvector , i uintptr ) uint8 {
return ( bv . bytedata [ i / 8 ] >> ( i % 8 ) ) & 1
2014-11-11 17:04:34 -05:00
}
// bv describes the memory starting at address scanp.
// Adjust any pointers contained therein.
func adjustpointers ( scanp unsafe . Pointer , cbv * bitvector , adjinfo * adjustinfo , f * _func ) {
bv := gobv ( * cbv )
minp := adjinfo . old . lo
maxp := adjinfo . old . hi
delta := adjinfo . delta
2016-02-29 15:01:00 -08:00
num := bv . n
2014-11-11 17:04:34 -05:00
for i := uintptr ( 0 ) ; i < num ; i ++ {
if stackDebug >= 4 {
2015-11-11 12:39:30 -05:00
print ( " " , add ( scanp , i * sys . PtrSize ) , ":" , ptrnames [ ptrbit ( & bv , i ) ] , ":" , hex ( * ( * uintptr ) ( add ( scanp , i * sys . PtrSize ) ) ) , " # " , i , " " , bv . bytedata [ i / 8 ] , "\n" )
2014-11-11 17:04:34 -05:00
}
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
if ptrbit ( & bv , i ) == 1 {
2015-11-11 12:39:30 -05:00
pp := ( * uintptr ) ( add ( scanp , i * sys . PtrSize ) )
runtime: eliminate write barrier from adjustpointers
Currently adjustpointers invokes a write barrier for every stack slot
it updates. This is safe---the write barrier always does nothing
because the new value is never a heap pointer---but it's unnecessary
overhead in performance and complexity.
Fix this by rewriting adjustpointers to work with *uintptrs instead of
*unsafe.Pointers. As an added bonus, this makes the code cleaner.
name old mean new mean delta
BinaryTree17 3.35s × (0.98,1.01) 3.33s × (0.99,1.02) ~ (p=0.095 n=20+19)
Fannkuch11 2.49s × (1.00,1.01) 2.52s × (0.99,1.01) +1.23% (p=0.000 n=19+20)
FmtFprintfEmpty 52.2ns × (0.99,1.02) 52.2ns × (0.99,1.02) ~ (p=0.766 n=19+19)
FmtFprintfString 181ns × (0.99,1.02) 179ns × (0.99,1.01) -1.06% (p=0.000 n=20+19)
FmtFprintfInt 177ns × (0.99,1.01) 173ns × (0.99,1.02) -2.26% (p=0.000 n=17+20)
FmtFprintfIntInt 300ns × (0.99,1.01) 302ns × (0.99,1.01) +0.76% (p=0.000 n=19+20)
FmtFprintfPrefixedInt 253ns × (0.99,1.02) 256ns × (0.99,1.01) +0.96% (p=0.000 n=20+19)
FmtFprintfFloat 334ns × (0.99,1.02) 334ns × (1.00,1.01) ~ (p=0.243 n=20+19)
FmtManyArgs 1.16µs × (0.99,1.01) 1.17µs × (0.99,1.02) +0.88% (p=0.000 n=20+20)
GobDecode 9.16ms × (0.99,1.02) 9.18ms × (1.00,1.00) +0.21% (p=0.048 n=20+17)
GobEncode 7.03ms × (0.99,1.01) 7.05ms × (0.99,1.01) ~ (p=0.091 n=19+19)
Gzip 374ms × (0.99,1.01) 372ms × (0.99,1.02) -0.50% (p=0.008 n=18+20)
Gunzip 92.9ms × (0.99,1.01) 92.5ms × (1.00,1.01) -0.47% (p=0.002 n=19+19)
HTTPClientServer 53.1µs × (0.98,1.01) 52.5µs × (0.99,1.01) -0.98% (p=0.000 n=20+19)
JSONEncode 17.4ms × (0.99,1.02) 17.5ms × (0.99,1.01) ~ (p=0.061 n=19+20)
JSONDecode 66.0ms × (0.99,1.02) 64.7ms × (0.99,1.01) -1.87% (p=0.000 n=20+20)
Mandelbrot200 3.94ms × (1.00,1.01) 3.95ms × (1.00,1.01) ~ (p=0.799 n=18+19)
GoParse 3.89ms × (0.99,1.02) 3.86ms × (0.99,1.01) -0.70% (p=0.016 n=20+19)
RegexpMatchEasy0_32 102ns × (0.99,1.02) 102ns × (1.00,1.01) ~ (p=0.557 n=20+18)
RegexpMatchEasy0_1K 353ns × (0.99,1.02) 341ns × (0.99,1.01) -3.38% (p=0.000 n=20+20)
RegexpMatchEasy1_32 85.0ns × (0.99,1.02) 85.0ns × (0.99,1.01) ~ (p=0.851 n=19+20)
RegexpMatchEasy1_1K 521ns × (0.99,1.02) 506ns × (1.00,1.01) -2.85% (p=0.000 n=20+18)
RegexpMatchMedium_32 142ns × (0.99,1.02) 141ns × (1.00,1.01) -1.17% (p=0.000 n=20+19)
RegexpMatchMedium_1K 42.8µs × (0.99,1.01) 42.3µs × (0.99,1.01) -1.07% (p=0.000 n=20+19)
RegexpMatchHard_32 2.17µs × (0.99,1.01) 2.16µs × (1.00,1.01) -0.51% (p=0.042 n=20+18)
RegexpMatchHard_1K 65.6µs × (0.99,1.01) 64.8µs × (1.00,1.00) -1.21% (p=0.000 n=20+17)
Revcomp 581ms × (0.99,1.04) 536ms × (1.00,1.01) -7.71% (p=0.000 n=20+18)
Template 77.2ms × (0.99,1.01) 76.8ms × (0.99,1.01) ~ (p=0.426 n=20+18)
TimeParse 369ns × (0.99,1.02) 371ns × (1.00,1.01) ~ (p=0.117 n=20+19)
TimeFormat 371ns × (0.99,1.02) 391ns × (0.99,1.01) +5.33% (p=0.000 n=20+19)
Change-Id: I5b952ba577ac4365c8c87db837c5804a1e30b7be
Reviewed-on: https://go-review.googlesource.com/10293
Reviewed-by: Russ Cox <rsc@golang.org>
2015-05-20 11:57:02 -04:00
p := * pp
if f != nil && 0 < p && p < _PageSize && debug . invalidptr != 0 || p == poisonStack {
2014-11-11 17:04:34 -05:00
// Looks like a junk value in a pointer slot.
// Live analysis wrong?
getg ( ) . m . traceback = 2
runtime: eliminate write barrier from adjustpointers
Currently adjustpointers invokes a write barrier for every stack slot
it updates. This is safe---the write barrier always does nothing
because the new value is never a heap pointer---but it's unnecessary
overhead in performance and complexity.
Fix this by rewriting adjustpointers to work with *uintptrs instead of
*unsafe.Pointers. As an added bonus, this makes the code cleaner.
name old mean new mean delta
BinaryTree17 3.35s × (0.98,1.01) 3.33s × (0.99,1.02) ~ (p=0.095 n=20+19)
Fannkuch11 2.49s × (1.00,1.01) 2.52s × (0.99,1.01) +1.23% (p=0.000 n=19+20)
FmtFprintfEmpty 52.2ns × (0.99,1.02) 52.2ns × (0.99,1.02) ~ (p=0.766 n=19+19)
FmtFprintfString 181ns × (0.99,1.02) 179ns × (0.99,1.01) -1.06% (p=0.000 n=20+19)
FmtFprintfInt 177ns × (0.99,1.01) 173ns × (0.99,1.02) -2.26% (p=0.000 n=17+20)
FmtFprintfIntInt 300ns × (0.99,1.01) 302ns × (0.99,1.01) +0.76% (p=0.000 n=19+20)
FmtFprintfPrefixedInt 253ns × (0.99,1.02) 256ns × (0.99,1.01) +0.96% (p=0.000 n=20+19)
FmtFprintfFloat 334ns × (0.99,1.02) 334ns × (1.00,1.01) ~ (p=0.243 n=20+19)
FmtManyArgs 1.16µs × (0.99,1.01) 1.17µs × (0.99,1.02) +0.88% (p=0.000 n=20+20)
GobDecode 9.16ms × (0.99,1.02) 9.18ms × (1.00,1.00) +0.21% (p=0.048 n=20+17)
GobEncode 7.03ms × (0.99,1.01) 7.05ms × (0.99,1.01) ~ (p=0.091 n=19+19)
Gzip 374ms × (0.99,1.01) 372ms × (0.99,1.02) -0.50% (p=0.008 n=18+20)
Gunzip 92.9ms × (0.99,1.01) 92.5ms × (1.00,1.01) -0.47% (p=0.002 n=19+19)
HTTPClientServer 53.1µs × (0.98,1.01) 52.5µs × (0.99,1.01) -0.98% (p=0.000 n=20+19)
JSONEncode 17.4ms × (0.99,1.02) 17.5ms × (0.99,1.01) ~ (p=0.061 n=19+20)
JSONDecode 66.0ms × (0.99,1.02) 64.7ms × (0.99,1.01) -1.87% (p=0.000 n=20+20)
Mandelbrot200 3.94ms × (1.00,1.01) 3.95ms × (1.00,1.01) ~ (p=0.799 n=18+19)
GoParse 3.89ms × (0.99,1.02) 3.86ms × (0.99,1.01) -0.70% (p=0.016 n=20+19)
RegexpMatchEasy0_32 102ns × (0.99,1.02) 102ns × (1.00,1.01) ~ (p=0.557 n=20+18)
RegexpMatchEasy0_1K 353ns × (0.99,1.02) 341ns × (0.99,1.01) -3.38% (p=0.000 n=20+20)
RegexpMatchEasy1_32 85.0ns × (0.99,1.02) 85.0ns × (0.99,1.01) ~ (p=0.851 n=19+20)
RegexpMatchEasy1_1K 521ns × (0.99,1.02) 506ns × (1.00,1.01) -2.85% (p=0.000 n=20+18)
RegexpMatchMedium_32 142ns × (0.99,1.02) 141ns × (1.00,1.01) -1.17% (p=0.000 n=20+19)
RegexpMatchMedium_1K 42.8µs × (0.99,1.01) 42.3µs × (0.99,1.01) -1.07% (p=0.000 n=20+19)
RegexpMatchHard_32 2.17µs × (0.99,1.01) 2.16µs × (1.00,1.01) -0.51% (p=0.042 n=20+18)
RegexpMatchHard_1K 65.6µs × (0.99,1.01) 64.8µs × (1.00,1.00) -1.21% (p=0.000 n=20+17)
Revcomp 581ms × (0.99,1.04) 536ms × (1.00,1.01) -7.71% (p=0.000 n=20+18)
Template 77.2ms × (0.99,1.01) 76.8ms × (0.99,1.01) ~ (p=0.426 n=20+18)
TimeParse 369ns × (0.99,1.02) 371ns × (1.00,1.01) ~ (p=0.117 n=20+19)
TimeFormat 371ns × (0.99,1.02) 391ns × (0.99,1.01) +5.33% (p=0.000 n=20+19)
Change-Id: I5b952ba577ac4365c8c87db837c5804a1e30b7be
Reviewed-on: https://go-review.googlesource.com/10293
Reviewed-by: Russ Cox <rsc@golang.org>
2015-05-20 11:57:02 -04:00
print ( "runtime: bad pointer in frame " , funcname ( f ) , " at " , pp , ": " , hex ( p ) , "\n" )
2014-12-27 20:58:00 -08:00
throw ( "invalid stack pointer" )
2014-11-11 17:04:34 -05:00
}
runtime: eliminate write barrier from adjustpointers
Currently adjustpointers invokes a write barrier for every stack slot
it updates. This is safe---the write barrier always does nothing
because the new value is never a heap pointer---but it's unnecessary
overhead in performance and complexity.
Fix this by rewriting adjustpointers to work with *uintptrs instead of
*unsafe.Pointers. As an added bonus, this makes the code cleaner.
name old mean new mean delta
BinaryTree17 3.35s × (0.98,1.01) 3.33s × (0.99,1.02) ~ (p=0.095 n=20+19)
Fannkuch11 2.49s × (1.00,1.01) 2.52s × (0.99,1.01) +1.23% (p=0.000 n=19+20)
FmtFprintfEmpty 52.2ns × (0.99,1.02) 52.2ns × (0.99,1.02) ~ (p=0.766 n=19+19)
FmtFprintfString 181ns × (0.99,1.02) 179ns × (0.99,1.01) -1.06% (p=0.000 n=20+19)
FmtFprintfInt 177ns × (0.99,1.01) 173ns × (0.99,1.02) -2.26% (p=0.000 n=17+20)
FmtFprintfIntInt 300ns × (0.99,1.01) 302ns × (0.99,1.01) +0.76% (p=0.000 n=19+20)
FmtFprintfPrefixedInt 253ns × (0.99,1.02) 256ns × (0.99,1.01) +0.96% (p=0.000 n=20+19)
FmtFprintfFloat 334ns × (0.99,1.02) 334ns × (1.00,1.01) ~ (p=0.243 n=20+19)
FmtManyArgs 1.16µs × (0.99,1.01) 1.17µs × (0.99,1.02) +0.88% (p=0.000 n=20+20)
GobDecode 9.16ms × (0.99,1.02) 9.18ms × (1.00,1.00) +0.21% (p=0.048 n=20+17)
GobEncode 7.03ms × (0.99,1.01) 7.05ms × (0.99,1.01) ~ (p=0.091 n=19+19)
Gzip 374ms × (0.99,1.01) 372ms × (0.99,1.02) -0.50% (p=0.008 n=18+20)
Gunzip 92.9ms × (0.99,1.01) 92.5ms × (1.00,1.01) -0.47% (p=0.002 n=19+19)
HTTPClientServer 53.1µs × (0.98,1.01) 52.5µs × (0.99,1.01) -0.98% (p=0.000 n=20+19)
JSONEncode 17.4ms × (0.99,1.02) 17.5ms × (0.99,1.01) ~ (p=0.061 n=19+20)
JSONDecode 66.0ms × (0.99,1.02) 64.7ms × (0.99,1.01) -1.87% (p=0.000 n=20+20)
Mandelbrot200 3.94ms × (1.00,1.01) 3.95ms × (1.00,1.01) ~ (p=0.799 n=18+19)
GoParse 3.89ms × (0.99,1.02) 3.86ms × (0.99,1.01) -0.70% (p=0.016 n=20+19)
RegexpMatchEasy0_32 102ns × (0.99,1.02) 102ns × (1.00,1.01) ~ (p=0.557 n=20+18)
RegexpMatchEasy0_1K 353ns × (0.99,1.02) 341ns × (0.99,1.01) -3.38% (p=0.000 n=20+20)
RegexpMatchEasy1_32 85.0ns × (0.99,1.02) 85.0ns × (0.99,1.01) ~ (p=0.851 n=19+20)
RegexpMatchEasy1_1K 521ns × (0.99,1.02) 506ns × (1.00,1.01) -2.85% (p=0.000 n=20+18)
RegexpMatchMedium_32 142ns × (0.99,1.02) 141ns × (1.00,1.01) -1.17% (p=0.000 n=20+19)
RegexpMatchMedium_1K 42.8µs × (0.99,1.01) 42.3µs × (0.99,1.01) -1.07% (p=0.000 n=20+19)
RegexpMatchHard_32 2.17µs × (0.99,1.01) 2.16µs × (1.00,1.01) -0.51% (p=0.042 n=20+18)
RegexpMatchHard_1K 65.6µs × (0.99,1.01) 64.8µs × (1.00,1.00) -1.21% (p=0.000 n=20+17)
Revcomp 581ms × (0.99,1.04) 536ms × (1.00,1.01) -7.71% (p=0.000 n=20+18)
Template 77.2ms × (0.99,1.01) 76.8ms × (0.99,1.01) ~ (p=0.426 n=20+18)
TimeParse 369ns × (0.99,1.02) 371ns × (1.00,1.01) ~ (p=0.117 n=20+19)
TimeFormat 371ns × (0.99,1.02) 391ns × (0.99,1.01) +5.33% (p=0.000 n=20+19)
Change-Id: I5b952ba577ac4365c8c87db837c5804a1e30b7be
Reviewed-on: https://go-review.googlesource.com/10293
Reviewed-by: Russ Cox <rsc@golang.org>
2015-05-20 11:57:02 -04:00
if minp <= p && p < maxp {
2014-11-11 17:04:34 -05:00
if stackDebug >= 3 {
2014-12-28 23:16:32 -08:00
print ( "adjust ptr " , p , " " , funcname ( f ) , "\n" )
2014-11-11 17:04:34 -05:00
}
runtime: eliminate write barrier from adjustpointers
Currently adjustpointers invokes a write barrier for every stack slot
it updates. This is safe---the write barrier always does nothing
because the new value is never a heap pointer---but it's unnecessary
overhead in performance and complexity.
Fix this by rewriting adjustpointers to work with *uintptrs instead of
*unsafe.Pointers. As an added bonus, this makes the code cleaner.
name old mean new mean delta
BinaryTree17 3.35s × (0.98,1.01) 3.33s × (0.99,1.02) ~ (p=0.095 n=20+19)
Fannkuch11 2.49s × (1.00,1.01) 2.52s × (0.99,1.01) +1.23% (p=0.000 n=19+20)
FmtFprintfEmpty 52.2ns × (0.99,1.02) 52.2ns × (0.99,1.02) ~ (p=0.766 n=19+19)
FmtFprintfString 181ns × (0.99,1.02) 179ns × (0.99,1.01) -1.06% (p=0.000 n=20+19)
FmtFprintfInt 177ns × (0.99,1.01) 173ns × (0.99,1.02) -2.26% (p=0.000 n=17+20)
FmtFprintfIntInt 300ns × (0.99,1.01) 302ns × (0.99,1.01) +0.76% (p=0.000 n=19+20)
FmtFprintfPrefixedInt 253ns × (0.99,1.02) 256ns × (0.99,1.01) +0.96% (p=0.000 n=20+19)
FmtFprintfFloat 334ns × (0.99,1.02) 334ns × (1.00,1.01) ~ (p=0.243 n=20+19)
FmtManyArgs 1.16µs × (0.99,1.01) 1.17µs × (0.99,1.02) +0.88% (p=0.000 n=20+20)
GobDecode 9.16ms × (0.99,1.02) 9.18ms × (1.00,1.00) +0.21% (p=0.048 n=20+17)
GobEncode 7.03ms × (0.99,1.01) 7.05ms × (0.99,1.01) ~ (p=0.091 n=19+19)
Gzip 374ms × (0.99,1.01) 372ms × (0.99,1.02) -0.50% (p=0.008 n=18+20)
Gunzip 92.9ms × (0.99,1.01) 92.5ms × (1.00,1.01) -0.47% (p=0.002 n=19+19)
HTTPClientServer 53.1µs × (0.98,1.01) 52.5µs × (0.99,1.01) -0.98% (p=0.000 n=20+19)
JSONEncode 17.4ms × (0.99,1.02) 17.5ms × (0.99,1.01) ~ (p=0.061 n=19+20)
JSONDecode 66.0ms × (0.99,1.02) 64.7ms × (0.99,1.01) -1.87% (p=0.000 n=20+20)
Mandelbrot200 3.94ms × (1.00,1.01) 3.95ms × (1.00,1.01) ~ (p=0.799 n=18+19)
GoParse 3.89ms × (0.99,1.02) 3.86ms × (0.99,1.01) -0.70% (p=0.016 n=20+19)
RegexpMatchEasy0_32 102ns × (0.99,1.02) 102ns × (1.00,1.01) ~ (p=0.557 n=20+18)
RegexpMatchEasy0_1K 353ns × (0.99,1.02) 341ns × (0.99,1.01) -3.38% (p=0.000 n=20+20)
RegexpMatchEasy1_32 85.0ns × (0.99,1.02) 85.0ns × (0.99,1.01) ~ (p=0.851 n=19+20)
RegexpMatchEasy1_1K 521ns × (0.99,1.02) 506ns × (1.00,1.01) -2.85% (p=0.000 n=20+18)
RegexpMatchMedium_32 142ns × (0.99,1.02) 141ns × (1.00,1.01) -1.17% (p=0.000 n=20+19)
RegexpMatchMedium_1K 42.8µs × (0.99,1.01) 42.3µs × (0.99,1.01) -1.07% (p=0.000 n=20+19)
RegexpMatchHard_32 2.17µs × (0.99,1.01) 2.16µs × (1.00,1.01) -0.51% (p=0.042 n=20+18)
RegexpMatchHard_1K 65.6µs × (0.99,1.01) 64.8µs × (1.00,1.00) -1.21% (p=0.000 n=20+17)
Revcomp 581ms × (0.99,1.04) 536ms × (1.00,1.01) -7.71% (p=0.000 n=20+18)
Template 77.2ms × (0.99,1.01) 76.8ms × (0.99,1.01) ~ (p=0.426 n=20+18)
TimeParse 369ns × (0.99,1.02) 371ns × (1.00,1.01) ~ (p=0.117 n=20+19)
TimeFormat 371ns × (0.99,1.02) 391ns × (0.99,1.01) +5.33% (p=0.000 n=20+19)
Change-Id: I5b952ba577ac4365c8c87db837c5804a1e30b7be
Reviewed-on: https://go-review.googlesource.com/10293
Reviewed-by: Russ Cox <rsc@golang.org>
2015-05-20 11:57:02 -04:00
* pp = p + delta
2014-11-11 17:04:34 -05:00
}
}
}
}
// Note: the argument/return area is adjusted by the callee.
func adjustframe ( frame * stkframe , arg unsafe . Pointer ) bool {
adjinfo := ( * adjustinfo ) ( arg )
targetpc := frame . continpc
if targetpc == 0 {
// Frame is dead.
return true
}
f := frame . fn
if stackDebug >= 2 {
print ( " adjusting " , funcname ( f ) , " frame=[" , hex ( frame . sp ) , "," , hex ( frame . fp ) , "] pc=" , hex ( frame . pc ) , " continpc=" , hex ( frame . continpc ) , "\n" )
}
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
if f . entry == systemstack_switchPC {
// A special routine at the bottom of stack of a goroutine that does an systemstack call.
2014-11-11 17:04:34 -05:00
// We will allow it to be copied even though we don't
// have full GC info for it (because it is written in asm).
return true
}
if targetpc != f . entry {
targetpc --
}
runtime: add pcvalue cache to improve stack scan speed
The cost of scanning large stacks is currently dominated by the time
spent looking up and decoding the pcvalue table. However, large stacks
are usually large not because they contain calls to many different
functions, but because they contain many calls to the same, small set
of recursive functions. Hence, walking large stacks tends to make the
same pcvalue queries many times.
Based on this observation, this commit adds a small, very simple, and
fast cache in front of pcvalue lookup. We thread this cache down from
operations that make many pcvalue calls, such as gentraceback, stack
scanning, and stack adjusting.
This simple cache works well because it has minimal overhead when it's
not effective. I also tried a hashed direct-map cache, CLOCK-based
replacement, round-robin replacement, and round-robin with lookups
disabled until there had been at least 16 probes, but none of these
approaches had obvious wins over the random replacement policy in this
commit.
This nearly doubles the overall performance of the deep stack test
program from issue #10898:
name old time/op new time/op delta
Issue10898 16.5s ±12% 9.2s ±12% -44.37% (p=0.008 n=5+5)
It's a very slight win on the garbage benchmark:
name old time/op new time/op delta
XBenchGarbage-12 4.92ms ± 1% 4.89ms ± 1% -0.75% (p=0.000 n=18+19)
It's a wash (but doesn't harm performance) on the go1 benchmarks,
which don't have particularly deep stacks:
name old time/op new time/op delta
BinaryTree17-12 3.11s ± 2% 3.20s ± 3% +2.83% (p=0.000 n=17+20)
Fannkuch11-12 2.51s ± 1% 2.51s ± 1% -0.22% (p=0.034 n=19+18)
FmtFprintfEmpty-12 50.8ns ± 3% 50.6ns ± 2% ~ (p=0.793 n=20+20)
FmtFprintfString-12 174ns ± 0% 174ns ± 1% +0.17% (p=0.048 n=15+20)
FmtFprintfInt-12 177ns ± 0% 165ns ± 1% -6.99% (p=0.000 n=17+19)
FmtFprintfIntInt-12 283ns ± 1% 284ns ± 0% +0.22% (p=0.000 n=18+15)
FmtFprintfPrefixedInt-12 243ns ± 1% 244ns ± 1% +0.40% (p=0.000 n=20+19)
FmtFprintfFloat-12 318ns ± 0% 319ns ± 0% +0.27% (p=0.001 n=19+20)
FmtManyArgs-12 1.12µs ± 0% 1.14µs ± 0% +1.74% (p=0.000 n=19+20)
GobDecode-12 8.69ms ± 0% 8.73ms ± 1% +0.46% (p=0.000 n=18+18)
GobEncode-12 6.64ms ± 1% 6.61ms ± 1% -0.46% (p=0.000 n=20+20)
Gzip-12 323ms ± 2% 319ms ± 1% -1.11% (p=0.000 n=20+20)
Gunzip-12 42.8ms ± 0% 42.9ms ± 0% ~ (p=0.158 n=18+20)
HTTPClientServer-12 63.3µs ± 1% 63.1µs ± 1% -0.35% (p=0.011 n=20+20)
JSONEncode-12 16.9ms ± 1% 17.3ms ± 1% +2.84% (p=0.000 n=19+20)
JSONDecode-12 59.7ms ± 0% 58.5ms ± 0% -2.05% (p=0.000 n=19+17)
Mandelbrot200-12 3.92ms ± 0% 3.91ms ± 0% -0.16% (p=0.003 n=19+19)
GoParse-12 3.79ms ± 2% 3.75ms ± 2% -0.91% (p=0.005 n=20+20)
RegexpMatchEasy0_32-12 102ns ± 1% 101ns ± 1% -0.80% (p=0.001 n=14+20)
RegexpMatchEasy0_1K-12 337ns ± 1% 346ns ± 1% +2.90% (p=0.000 n=20+19)
RegexpMatchEasy1_32-12 84.4ns ± 2% 84.3ns ± 2% ~ (p=0.743 n=20+20)
RegexpMatchEasy1_1K-12 502ns ± 1% 505ns ± 0% +0.64% (p=0.000 n=20+20)
RegexpMatchMedium_32-12 133ns ± 1% 132ns ± 1% -0.85% (p=0.000 n=20+19)
RegexpMatchMedium_1K-12 40.1µs ± 1% 39.8µs ± 1% -0.77% (p=0.000 n=18+18)
RegexpMatchHard_32-12 2.08µs ± 1% 2.07µs ± 1% -0.55% (p=0.001 n=18+19)
RegexpMatchHard_1K-12 62.4µs ± 1% 62.0µs ± 1% -0.74% (p=0.000 n=19+19)
Revcomp-12 545ms ± 2% 545ms ± 3% ~ (p=0.771 n=19+20)
Template-12 73.7ms ± 1% 72.0ms ± 0% -2.33% (p=0.000 n=20+18)
TimeParse-12 358ns ± 1% 351ns ± 1% -2.07% (p=0.000 n=20+20)
TimeFormat-12 369ns ± 1% 356ns ± 0% -3.53% (p=0.000 n=20+18)
[Geo mean] 63.5µs 63.2µs -0.41%
name old speed new speed delta
GobDecode-12 88.3MB/s ± 0% 87.9MB/s ± 0% -0.43% (p=0.000 n=18+17)
GobEncode-12 116MB/s ± 1% 116MB/s ± 1% +0.47% (p=0.000 n=20+20)
Gzip-12 60.2MB/s ± 2% 60.8MB/s ± 1% +1.13% (p=0.000 n=20+20)
Gunzip-12 453MB/s ± 0% 453MB/s ± 0% ~ (p=0.160 n=18+20)
JSONEncode-12 115MB/s ± 1% 112MB/s ± 1% -2.76% (p=0.000 n=19+20)
JSONDecode-12 32.5MB/s ± 0% 33.2MB/s ± 0% +2.09% (p=0.000 n=19+17)
GoParse-12 15.3MB/s ± 2% 15.4MB/s ± 2% +0.92% (p=0.004 n=20+20)
RegexpMatchEasy0_32-12 311MB/s ± 1% 314MB/s ± 1% +0.78% (p=0.000 n=15+19)
RegexpMatchEasy0_1K-12 3.04GB/s ± 1% 2.95GB/s ± 1% -2.90% (p=0.000 n=19+19)
RegexpMatchEasy1_32-12 379MB/s ± 2% 380MB/s ± 2% ~ (p=0.779 n=20+20)
RegexpMatchEasy1_1K-12 2.04GB/s ± 1% 2.02GB/s ± 0% -0.62% (p=0.000 n=20+20)
RegexpMatchMedium_32-12 7.46MB/s ± 1% 7.53MB/s ± 1% +0.86% (p=0.000 n=20+19)
RegexpMatchMedium_1K-12 25.5MB/s ± 1% 25.7MB/s ± 1% +0.78% (p=0.000 n=18+18)
RegexpMatchHard_32-12 15.4MB/s ± 1% 15.5MB/s ± 1% +0.62% (p=0.000 n=19+19)
RegexpMatchHard_1K-12 16.4MB/s ± 1% 16.5MB/s ± 1% +0.82% (p=0.000 n=20+19)
Revcomp-12 466MB/s ± 2% 466MB/s ± 3% ~ (p=0.765 n=19+20)
Template-12 26.3MB/s ± 1% 27.0MB/s ± 0% +2.38% (p=0.000 n=20+18)
[Geo mean] 97.8MB/s 98.0MB/s +0.23%
Change-Id: I281044ae0b24990ba46487cacbc1069493274bc4
Reviewed-on: https://go-review.googlesource.com/13614
Reviewed-by: Keith Randall <khr@golang.org>
2015-08-12 23:43:43 -04:00
pcdata := pcdatavalue ( f , _PCDATA_StackMapIndex , targetpc , & adjinfo . cache )
2014-11-11 17:04:34 -05:00
if pcdata == - 1 {
pcdata = 0 // in prologue
}
// Adjust local variables if stack frame has been allocated.
size := frame . varp - frame . sp
var minsize uintptr
2015-11-11 12:39:30 -05:00
switch sys . TheChar {
2015-03-08 14:20:20 +01:00
case '7' :
2015-11-11 12:39:30 -05:00
minsize = sys . SpAlign
2015-03-08 14:20:20 +01:00
default :
2015-11-11 12:39:30 -05:00
minsize = sys . MinFrameSize
2014-11-11 17:04:34 -05:00
}
if size > minsize {
var bv bitvector
stackmap := ( * stackmap ) ( funcdata ( f , _FUNCDATA_LocalsPointerMaps ) )
if stackmap == nil || stackmap . n <= 0 {
print ( "runtime: frame " , funcname ( f ) , " untyped locals " , hex ( frame . varp - size ) , "+" , hex ( size ) , "\n" )
2014-12-27 20:58:00 -08:00
throw ( "missing stackmap" )
2014-11-11 17:04:34 -05:00
}
// Locals bitmap information, scan just the pointers in locals.
if pcdata < 0 || pcdata >= stackmap . n {
// don't know where we are
print ( "runtime: pcdata is " , pcdata , " and " , stackmap . n , " locals stack map entries for " , funcname ( f ) , " (targetpc=" , targetpc , ")\n" )
2014-12-27 20:58:00 -08:00
throw ( "bad symbol table" )
2014-11-11 17:04:34 -05:00
}
bv = stackmapdata ( stackmap , pcdata )
2015-11-11 12:39:30 -05:00
size = uintptr ( bv . n ) * sys . PtrSize
2014-11-11 17:04:34 -05:00
if stackDebug >= 3 {
2015-11-11 12:39:30 -05:00
print ( " locals " , pcdata , "/" , stackmap . n , " " , size / sys . PtrSize , " words " , bv . bytedata , "\n" )
2014-11-11 17:04:34 -05:00
}
adjustpointers ( unsafe . Pointer ( frame . varp - size ) , & bv , adjinfo , f )
}
2015-01-14 11:09:50 -05:00
// Adjust saved base pointer if there is one.
2015-11-11 12:39:30 -05:00
if sys . TheChar == '6' && frame . argp - frame . varp == 2 * sys . RegSize {
2015-01-14 11:09:50 -05:00
if ! framepointer_enabled {
2015-02-03 08:35:38 -05:00
print ( "runtime: found space for saved base pointer, but no framepointer experiment\n" )
2015-02-03 09:09:56 -05:00
print ( "argp=" , hex ( frame . argp ) , " varp=" , hex ( frame . varp ) , "\n" )
2015-01-14 11:09:50 -05:00
throw ( "bad frame layout" )
}
if stackDebug >= 3 {
print ( " saved bp\n" )
}
adjustpointer ( adjinfo , unsafe . Pointer ( frame . varp ) )
}
2014-11-11 17:04:34 -05:00
// Adjust arguments.
if frame . arglen > 0 {
var bv bitvector
if frame . argmap != nil {
bv = * frame . argmap
} else {
stackmap := ( * stackmap ) ( funcdata ( f , _FUNCDATA_ArgsPointerMaps ) )
if stackmap == nil || stackmap . n <= 0 {
2016-02-29 15:01:00 -08:00
print ( "runtime: frame " , funcname ( f ) , " untyped args " , frame . argp , "+" , frame . arglen , "\n" )
2014-12-27 20:58:00 -08:00
throw ( "missing stackmap" )
2014-11-11 17:04:34 -05:00
}
if pcdata < 0 || pcdata >= stackmap . n {
// don't know where we are
print ( "runtime: pcdata is " , pcdata , " and " , stackmap . n , " args stack map entries for " , funcname ( f ) , " (targetpc=" , targetpc , ")\n" )
2014-12-27 20:58:00 -08:00
throw ( "bad symbol table" )
2014-11-11 17:04:34 -05:00
}
bv = stackmapdata ( stackmap , pcdata )
}
if stackDebug >= 3 {
print ( " args\n" )
}
adjustpointers ( unsafe . Pointer ( frame . argp ) , & bv , adjinfo , nil )
}
return true
}
func adjustctxt ( gp * g , adjinfo * adjustinfo ) {
2015-10-15 14:33:50 -07:00
adjustpointer ( adjinfo , unsafe . Pointer ( & gp . sched . ctxt ) )
2014-11-11 17:04:34 -05:00
}
func adjustdefers ( gp * g , adjinfo * adjustinfo ) {
// Adjust defer argument blocks the same way we adjust active stack frames.
tracebackdefers ( gp , adjustframe , noescape ( unsafe . Pointer ( adjinfo ) ) )
// Adjust pointers in the Defer structs.
// Defer structs themselves are never on the stack.
for d := gp . _defer ; d != nil ; d = d . link {
2015-10-15 14:33:50 -07:00
adjustpointer ( adjinfo , unsafe . Pointer ( & d . fn ) )
adjustpointer ( adjinfo , unsafe . Pointer ( & d . sp ) )
adjustpointer ( adjinfo , unsafe . Pointer ( & d . _panic ) )
2014-11-11 17:04:34 -05:00
}
}
func adjustpanics ( gp * g , adjinfo * adjustinfo ) {
// Panics are on stack and already adjusted.
// Update pointer to head of list in G.
2015-10-15 14:33:50 -07:00
adjustpointer ( adjinfo , unsafe . Pointer ( & gp . _panic ) )
2014-11-11 17:04:34 -05:00
}
func adjustsudogs ( gp * g , adjinfo * adjustinfo ) {
// the data elements pointed to by a SudoG structure
// might be in the stack.
for s := gp . waiting ; s != nil ; s = s . waitlink {
2015-10-15 14:33:50 -07:00
adjustpointer ( adjinfo , unsafe . Pointer ( & s . elem ) )
adjustpointer ( adjinfo , unsafe . Pointer ( & s . selectdone ) )
2014-11-11 17:04:34 -05:00
}
}
runtime: implement GC stack barriers
This commit implements stack barriers to minimize the amount of
stack re-scanning that must be done during mark termination.
Currently the GC scans stacks of active goroutines twice during every
GC cycle: once at the beginning during root discovery and once at the
end during mark termination. The second scan happens while the world
is stopped and guarantees that we've seen all of the roots (since
there are no write barriers on writes to local stack
variables). However, this means pause time is proportional to stack
size. In particularly recursive programs, this can drive pause time up
past our 10ms goal (e.g., it takes about 150ms to scan a 50MB heap).
Re-scanning the entire stack is rarely necessary, especially for large
stacks, because usually most of the frames on the stack were not
active between the first and second scans and hence any changes to
these frames (via non-escaping pointers passed down the stack) were
tracked by write barriers.
To efficiently track how far a stack has been unwound since the first
scan (and, hence, how much needs to be re-scanned), this commit
introduces stack barriers. During the first scan, at exponentially
spaced points in each stack, the scan overwrites return PCs with the
PC of the stack barrier function. When "returned" to, the stack
barrier function records how far the stack has unwound and jumps to
the original return PC for that point in the stack. Then the second
scan only needs to proceed as far as the lowest barrier that hasn't
been hit.
For deeply recursive programs, this substantially reduces mark
termination time (and hence pause time). For the goscheme example
linked in issue #10898, prior to this change, mark termination times
were typically between 100 and 500ms; with this change, mark
termination times are typically between 10 and 20ms. As a result of
the reduced stack scanning work, this reduces overall execution time
of the goscheme example by 20%.
Fixes #10898.
The effect of this on programs that are not deeply recursive is
minimal:
name old time/op new time/op delta
BinaryTree17 3.16s ± 2% 3.26s ± 1% +3.31% (p=0.000 n=19+19)
Fannkuch11 2.42s ± 1% 2.48s ± 1% +2.24% (p=0.000 n=17+19)
FmtFprintfEmpty 50.0ns ± 3% 49.8ns ± 1% ~ (p=0.534 n=20+19)
FmtFprintfString 173ns ± 0% 175ns ± 0% +1.49% (p=0.000 n=16+19)
FmtFprintfInt 170ns ± 1% 175ns ± 1% +2.97% (p=0.000 n=20+19)
FmtFprintfIntInt 288ns ± 0% 295ns ± 0% +2.73% (p=0.000 n=16+19)
FmtFprintfPrefixedInt 242ns ± 1% 252ns ± 1% +4.13% (p=0.000 n=18+18)
FmtFprintfFloat 324ns ± 0% 323ns ± 0% -0.36% (p=0.000 n=20+19)
FmtManyArgs 1.14µs ± 0% 1.12µs ± 1% -1.01% (p=0.000 n=18+19)
GobDecode 8.88ms ± 1% 8.87ms ± 0% ~ (p=0.480 n=19+18)
GobEncode 6.80ms ± 1% 6.85ms ± 0% +0.82% (p=0.000 n=20+18)
Gzip 363ms ± 1% 363ms ± 1% ~ (p=0.077 n=18+20)
Gunzip 90.6ms ± 0% 90.0ms ± 1% -0.71% (p=0.000 n=17+18)
HTTPClientServer 51.5µs ± 1% 50.8µs ± 1% -1.32% (p=0.000 n=18+18)
JSONEncode 17.0ms ± 0% 17.1ms ± 0% +0.40% (p=0.000 n=18+17)
JSONDecode 61.8ms ± 0% 63.8ms ± 1% +3.11% (p=0.000 n=18+17)
Mandelbrot200 3.84ms ± 0% 3.84ms ± 1% ~ (p=0.583 n=19+19)
GoParse 3.71ms ± 1% 3.72ms ± 1% ~ (p=0.159 n=18+19)
RegexpMatchEasy0_32 100ns ± 0% 100ns ± 1% -0.19% (p=0.033 n=17+19)
RegexpMatchEasy0_1K 342ns ± 1% 331ns ± 0% -3.41% (p=0.000 n=19+19)
RegexpMatchEasy1_32 82.5ns ± 0% 81.7ns ± 0% -0.98% (p=0.000 n=18+18)
RegexpMatchEasy1_1K 505ns ± 0% 494ns ± 1% -2.16% (p=0.000 n=18+18)
RegexpMatchMedium_32 137ns ± 1% 137ns ± 1% -0.24% (p=0.048 n=20+18)
RegexpMatchMedium_1K 41.6µs ± 0% 41.3µs ± 1% -0.57% (p=0.004 n=18+20)
RegexpMatchHard_32 2.11µs ± 0% 2.11µs ± 1% +0.20% (p=0.037 n=17+19)
RegexpMatchHard_1K 63.9µs ± 2% 63.3µs ± 0% -0.99% (p=0.000 n=20+17)
Revcomp 560ms ± 1% 522ms ± 0% -6.87% (p=0.000 n=18+16)
Template 75.0ms ± 0% 75.1ms ± 1% +0.18% (p=0.013 n=18+19)
TimeParse 358ns ± 1% 364ns ± 0% +1.74% (p=0.000 n=20+15)
TimeFormat 360ns ± 0% 372ns ± 0% +3.55% (p=0.000 n=20+18)
Change-Id: If8a9bfae6c128d15a4f405e02bcfa50129df82a2
Reviewed-on: https://go-review.googlesource.com/10314
Reviewed-by: Russ Cox <rsc@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-20 16:30:49 -04:00
func adjuststkbar ( gp * g , adjinfo * adjustinfo ) {
for i := int ( gp . stkbarPos ) ; i < len ( gp . stkbar ) ; i ++ {
2015-10-15 14:33:50 -07:00
adjustpointer ( adjinfo , unsafe . Pointer ( & gp . stkbar [ i ] . savedLRPtr ) )
runtime: implement GC stack barriers
This commit implements stack barriers to minimize the amount of
stack re-scanning that must be done during mark termination.
Currently the GC scans stacks of active goroutines twice during every
GC cycle: once at the beginning during root discovery and once at the
end during mark termination. The second scan happens while the world
is stopped and guarantees that we've seen all of the roots (since
there are no write barriers on writes to local stack
variables). However, this means pause time is proportional to stack
size. In particularly recursive programs, this can drive pause time up
past our 10ms goal (e.g., it takes about 150ms to scan a 50MB heap).
Re-scanning the entire stack is rarely necessary, especially for large
stacks, because usually most of the frames on the stack were not
active between the first and second scans and hence any changes to
these frames (via non-escaping pointers passed down the stack) were
tracked by write barriers.
To efficiently track how far a stack has been unwound since the first
scan (and, hence, how much needs to be re-scanned), this commit
introduces stack barriers. During the first scan, at exponentially
spaced points in each stack, the scan overwrites return PCs with the
PC of the stack barrier function. When "returned" to, the stack
barrier function records how far the stack has unwound and jumps to
the original return PC for that point in the stack. Then the second
scan only needs to proceed as far as the lowest barrier that hasn't
been hit.
For deeply recursive programs, this substantially reduces mark
termination time (and hence pause time). For the goscheme example
linked in issue #10898, prior to this change, mark termination times
were typically between 100 and 500ms; with this change, mark
termination times are typically between 10 and 20ms. As a result of
the reduced stack scanning work, this reduces overall execution time
of the goscheme example by 20%.
Fixes #10898.
The effect of this on programs that are not deeply recursive is
minimal:
name old time/op new time/op delta
BinaryTree17 3.16s ± 2% 3.26s ± 1% +3.31% (p=0.000 n=19+19)
Fannkuch11 2.42s ± 1% 2.48s ± 1% +2.24% (p=0.000 n=17+19)
FmtFprintfEmpty 50.0ns ± 3% 49.8ns ± 1% ~ (p=0.534 n=20+19)
FmtFprintfString 173ns ± 0% 175ns ± 0% +1.49% (p=0.000 n=16+19)
FmtFprintfInt 170ns ± 1% 175ns ± 1% +2.97% (p=0.000 n=20+19)
FmtFprintfIntInt 288ns ± 0% 295ns ± 0% +2.73% (p=0.000 n=16+19)
FmtFprintfPrefixedInt 242ns ± 1% 252ns ± 1% +4.13% (p=0.000 n=18+18)
FmtFprintfFloat 324ns ± 0% 323ns ± 0% -0.36% (p=0.000 n=20+19)
FmtManyArgs 1.14µs ± 0% 1.12µs ± 1% -1.01% (p=0.000 n=18+19)
GobDecode 8.88ms ± 1% 8.87ms ± 0% ~ (p=0.480 n=19+18)
GobEncode 6.80ms ± 1% 6.85ms ± 0% +0.82% (p=0.000 n=20+18)
Gzip 363ms ± 1% 363ms ± 1% ~ (p=0.077 n=18+20)
Gunzip 90.6ms ± 0% 90.0ms ± 1% -0.71% (p=0.000 n=17+18)
HTTPClientServer 51.5µs ± 1% 50.8µs ± 1% -1.32% (p=0.000 n=18+18)
JSONEncode 17.0ms ± 0% 17.1ms ± 0% +0.40% (p=0.000 n=18+17)
JSONDecode 61.8ms ± 0% 63.8ms ± 1% +3.11% (p=0.000 n=18+17)
Mandelbrot200 3.84ms ± 0% 3.84ms ± 1% ~ (p=0.583 n=19+19)
GoParse 3.71ms ± 1% 3.72ms ± 1% ~ (p=0.159 n=18+19)
RegexpMatchEasy0_32 100ns ± 0% 100ns ± 1% -0.19% (p=0.033 n=17+19)
RegexpMatchEasy0_1K 342ns ± 1% 331ns ± 0% -3.41% (p=0.000 n=19+19)
RegexpMatchEasy1_32 82.5ns ± 0% 81.7ns ± 0% -0.98% (p=0.000 n=18+18)
RegexpMatchEasy1_1K 505ns ± 0% 494ns ± 1% -2.16% (p=0.000 n=18+18)
RegexpMatchMedium_32 137ns ± 1% 137ns ± 1% -0.24% (p=0.048 n=20+18)
RegexpMatchMedium_1K 41.6µs ± 0% 41.3µs ± 1% -0.57% (p=0.004 n=18+20)
RegexpMatchHard_32 2.11µs ± 0% 2.11µs ± 1% +0.20% (p=0.037 n=17+19)
RegexpMatchHard_1K 63.9µs ± 2% 63.3µs ± 0% -0.99% (p=0.000 n=20+17)
Revcomp 560ms ± 1% 522ms ± 0% -6.87% (p=0.000 n=18+16)
Template 75.0ms ± 0% 75.1ms ± 1% +0.18% (p=0.013 n=18+19)
TimeParse 358ns ± 1% 364ns ± 0% +1.74% (p=0.000 n=20+15)
TimeFormat 360ns ± 0% 372ns ± 0% +3.55% (p=0.000 n=20+18)
Change-Id: If8a9bfae6c128d15a4f405e02bcfa50129df82a2
Reviewed-on: https://go-review.googlesource.com/10314
Reviewed-by: Russ Cox <rsc@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-20 16:30:49 -04:00
}
}
2014-11-11 17:04:34 -05:00
func fillstack ( stk stack , b byte ) {
for p := stk . lo ; p < stk . hi ; p ++ {
* ( * byte ) ( unsafe . Pointer ( p ) ) = b
}
}
// Copies gp's stack to a new stack of a different size.
2014-11-15 08:00:38 -05:00
// Caller must have changed gp status to Gcopystack.
2014-11-11 17:04:34 -05:00
func copystack ( gp * g , newsize uintptr ) {
if gp . syscallsp != 0 {
2014-12-27 20:58:00 -08:00
throw ( "stack growth not allowed in system call" )
2014-11-11 17:04:34 -05:00
}
old := gp . stack
if old . lo == 0 {
2014-12-27 20:58:00 -08:00
throw ( "nil stackbase" )
2014-11-11 17:04:34 -05:00
}
used := old . hi - gp . sched . sp
// allocate new stack
2015-05-20 16:16:04 -04:00
new , newstkbar := stackalloc ( uint32 ( newsize ) )
2014-11-11 17:04:34 -05:00
if stackPoisonCopy != 0 {
fillstack ( new , 0xfd )
}
if stackDebug >= 1 {
runtime: account for stack guard when shrinking the stack
Currently, when shrinkstack computes whether the halved stack
allocation will have enough room for the stack, it accounts for the
stack space that's actively in use but fails to leave extra room for
the stack guard space. As a result, *if* the minimum stack size is
small enough or the guard large enough, it may shrink the stack and
leave less than enough room to run nosplit functions. If the next
function called after the stack shrink is a nosplit function, it may
overflow the stack without noticing and overwrite non-stack memory.
We don't think this is happening under normal conditions right now.
The minimum stack allocation is 2K and the guard is 640 bytes. The
"worst case" stack shrink is from 4K (4048 bytes after stack barrier
array reservation) to 2K (2016 bytes after stack barrier array
reservation), which means the largest "used" size that will qualify
for shrinking is 4048/4 - 8 = 1004 bytes. After copying, that leaves
2016 - 1004 = 1012 bytes of available stack, which is significantly
more than the guard space.
If we were to reduce the minimum stack size to 1K or raise the guard
space above 1012 bytes, the logic in shrinkstack would no longer leave
enough space.
It's also possible to trigger this problem by setting
firstStackBarrierOffset to 0, which puts stack barriers in a debug
mode that steals away *half* of the stack for the stack barrier array
reservation. Then, the largest "used" size that qualifies for
shrinking is (4096/2)/4 - 8 = 504 bytes. After copying, that leaves
(2096/2) - 504 = 8 bytes of available stack; much less than the
required guard space. This causes failures like those in issue #11027
because func gc() shrinks its own stack and then immediately calls
casgstatus (a nosplit function), which overflows the stack and
overwrites a free list pointer in the neighboring span. However, since
this seems to require the special debug mode, we don't think it's
responsible for issue #11027.
To forestall all of these subtle issues, this commit modifies
shrinkstack to correctly account for the guard space when considering
whether to halve the stack allocation.
Change-Id: I7312584addc63b5bfe55cc384a1012f6181f1b9d
Reviewed-on: https://go-review.googlesource.com/10714
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-04 17:28:02 -04:00
print ( "copystack gp=" , gp , " [" , hex ( old . lo ) , " " , hex ( old . hi - used ) , " " , hex ( old . hi ) , "]/" , gp . stackAlloc , " -> [" , hex ( new . lo ) , " " , hex ( new . hi - used ) , " " , hex ( new . hi ) , "]/" , newsize , "\n" )
2014-11-11 17:04:34 -05:00
}
2016-02-16 12:23:33 -05:00
// Compute adjustment.
2014-11-11 17:04:34 -05:00
var adjinfo adjustinfo
adjinfo . old = old
adjinfo . delta = new . hi - old . hi
2016-02-16 12:23:33 -05:00
// copy the stack to the new location
memmove ( unsafe . Pointer ( new . hi - used ) , unsafe . Pointer ( old . hi - used ) , used )
// Disallow sigprof scans of this stack and block if there's
// one in progress.
gcLockStackBarriers ( gp )
// Adjust structures that have pointers into stacks. We have
// to do most of these before we traceback the new stack
// because gentraceback uses them.
2014-11-11 17:04:34 -05:00
adjustctxt ( gp , & adjinfo )
adjustdefers ( gp , & adjinfo )
adjustpanics ( gp , & adjinfo )
adjustsudogs ( gp , & adjinfo )
runtime: implement GC stack barriers
This commit implements stack barriers to minimize the amount of
stack re-scanning that must be done during mark termination.
Currently the GC scans stacks of active goroutines twice during every
GC cycle: once at the beginning during root discovery and once at the
end during mark termination. The second scan happens while the world
is stopped and guarantees that we've seen all of the roots (since
there are no write barriers on writes to local stack
variables). However, this means pause time is proportional to stack
size. In particularly recursive programs, this can drive pause time up
past our 10ms goal (e.g., it takes about 150ms to scan a 50MB heap).
Re-scanning the entire stack is rarely necessary, especially for large
stacks, because usually most of the frames on the stack were not
active between the first and second scans and hence any changes to
these frames (via non-escaping pointers passed down the stack) were
tracked by write barriers.
To efficiently track how far a stack has been unwound since the first
scan (and, hence, how much needs to be re-scanned), this commit
introduces stack barriers. During the first scan, at exponentially
spaced points in each stack, the scan overwrites return PCs with the
PC of the stack barrier function. When "returned" to, the stack
barrier function records how far the stack has unwound and jumps to
the original return PC for that point in the stack. Then the second
scan only needs to proceed as far as the lowest barrier that hasn't
been hit.
For deeply recursive programs, this substantially reduces mark
termination time (and hence pause time). For the goscheme example
linked in issue #10898, prior to this change, mark termination times
were typically between 100 and 500ms; with this change, mark
termination times are typically between 10 and 20ms. As a result of
the reduced stack scanning work, this reduces overall execution time
of the goscheme example by 20%.
Fixes #10898.
The effect of this on programs that are not deeply recursive is
minimal:
name old time/op new time/op delta
BinaryTree17 3.16s ± 2% 3.26s ± 1% +3.31% (p=0.000 n=19+19)
Fannkuch11 2.42s ± 1% 2.48s ± 1% +2.24% (p=0.000 n=17+19)
FmtFprintfEmpty 50.0ns ± 3% 49.8ns ± 1% ~ (p=0.534 n=20+19)
FmtFprintfString 173ns ± 0% 175ns ± 0% +1.49% (p=0.000 n=16+19)
FmtFprintfInt 170ns ± 1% 175ns ± 1% +2.97% (p=0.000 n=20+19)
FmtFprintfIntInt 288ns ± 0% 295ns ± 0% +2.73% (p=0.000 n=16+19)
FmtFprintfPrefixedInt 242ns ± 1% 252ns ± 1% +4.13% (p=0.000 n=18+18)
FmtFprintfFloat 324ns ± 0% 323ns ± 0% -0.36% (p=0.000 n=20+19)
FmtManyArgs 1.14µs ± 0% 1.12µs ± 1% -1.01% (p=0.000 n=18+19)
GobDecode 8.88ms ± 1% 8.87ms ± 0% ~ (p=0.480 n=19+18)
GobEncode 6.80ms ± 1% 6.85ms ± 0% +0.82% (p=0.000 n=20+18)
Gzip 363ms ± 1% 363ms ± 1% ~ (p=0.077 n=18+20)
Gunzip 90.6ms ± 0% 90.0ms ± 1% -0.71% (p=0.000 n=17+18)
HTTPClientServer 51.5µs ± 1% 50.8µs ± 1% -1.32% (p=0.000 n=18+18)
JSONEncode 17.0ms ± 0% 17.1ms ± 0% +0.40% (p=0.000 n=18+17)
JSONDecode 61.8ms ± 0% 63.8ms ± 1% +3.11% (p=0.000 n=18+17)
Mandelbrot200 3.84ms ± 0% 3.84ms ± 1% ~ (p=0.583 n=19+19)
GoParse 3.71ms ± 1% 3.72ms ± 1% ~ (p=0.159 n=18+19)
RegexpMatchEasy0_32 100ns ± 0% 100ns ± 1% -0.19% (p=0.033 n=17+19)
RegexpMatchEasy0_1K 342ns ± 1% 331ns ± 0% -3.41% (p=0.000 n=19+19)
RegexpMatchEasy1_32 82.5ns ± 0% 81.7ns ± 0% -0.98% (p=0.000 n=18+18)
RegexpMatchEasy1_1K 505ns ± 0% 494ns ± 1% -2.16% (p=0.000 n=18+18)
RegexpMatchMedium_32 137ns ± 1% 137ns ± 1% -0.24% (p=0.048 n=20+18)
RegexpMatchMedium_1K 41.6µs ± 0% 41.3µs ± 1% -0.57% (p=0.004 n=18+20)
RegexpMatchHard_32 2.11µs ± 0% 2.11µs ± 1% +0.20% (p=0.037 n=17+19)
RegexpMatchHard_1K 63.9µs ± 2% 63.3µs ± 0% -0.99% (p=0.000 n=20+17)
Revcomp 560ms ± 1% 522ms ± 0% -6.87% (p=0.000 n=18+16)
Template 75.0ms ± 0% 75.1ms ± 1% +0.18% (p=0.013 n=18+19)
TimeParse 358ns ± 1% 364ns ± 0% +1.74% (p=0.000 n=20+15)
TimeFormat 360ns ± 0% 372ns ± 0% +3.55% (p=0.000 n=20+18)
Change-Id: If8a9bfae6c128d15a4f405e02bcfa50129df82a2
Reviewed-on: https://go-review.googlesource.com/10314
Reviewed-by: Russ Cox <rsc@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-20 16:30:49 -04:00
adjuststkbar ( gp , & adjinfo )
2014-11-11 17:04:34 -05:00
2015-05-20 16:16:04 -04:00
// copy old stack barriers to new stack barrier array
newstkbar = newstkbar [ : len ( gp . stkbar ) ]
copy ( newstkbar , gp . stkbar )
2014-11-11 17:04:34 -05:00
// Swap out old stack for new one
gp . stack = new
2015-01-05 16:29:21 +00:00
gp . stackguard0 = new . lo + _StackGuard // NOTE: might clobber a preempt request
2014-11-11 17:04:34 -05:00
gp . sched . sp = new . hi - used
2015-05-20 15:29:53 -04:00
oldsize := gp . stackAlloc
gp . stackAlloc = newsize
2015-05-20 16:16:04 -04:00
gp . stkbar = newstkbar
2015-08-26 11:39:10 -04:00
gp . stktopsp += adjinfo . delta
2014-11-11 17:04:34 -05:00
2016-02-16 12:23:33 -05:00
// Adjust pointers in the new stack.
gentraceback ( ^ uintptr ( 0 ) , ^ uintptr ( 0 ) , 0 , gp , 0 , nil , 0x7fffffff , adjustframe , noescape ( unsafe . Pointer ( & adjinfo ) ) , 0 )
2015-11-23 15:03:38 -05:00
gcUnlockStackBarriers ( gp )
2014-11-11 17:04:34 -05:00
// free old stack
if stackPoisonCopy != 0 {
fillstack ( old , 0xfc )
}
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
stackfree ( old , oldsize )
2014-11-11 17:04:34 -05:00
}
// round x up to a power of 2.
func round2 ( x int32 ) int32 {
s := uint ( 0 )
for 1 << s < x {
s ++
}
return 1 << s
}
// Called from runtime·morestack when more stack is needed.
// Allocate larger stack and relocate to new stack.
// Stack growth is multiplicative, for constant amortized cost.
//
// g->atomicstatus will be Grunning or Gscanrunning upon entry.
// If the GC is trying to stop this g then it will set preemptscan to true.
func newstack ( ) {
thisg := getg ( )
// TODO: double check all gp. shouldn't be getg().
2014-12-22 10:53:51 -05:00
if thisg . m . morebuf . g . ptr ( ) . stackguard0 == stackFork {
2014-12-27 20:58:00 -08:00
throw ( "stack growth after fork" )
2014-11-11 17:04:34 -05:00
}
2014-12-22 10:53:51 -05:00
if thisg . m . morebuf . g . ptr ( ) != thisg . m . curg {
2015-08-24 21:24:23 -04:00
print ( "runtime: newstack called from g=" , hex ( thisg . m . morebuf . g ) , "\n" + "\tm=" , thisg . m , " m->curg=" , thisg . m . curg , " m->g0=" , thisg . m . g0 , " m->gsignal=" , thisg . m . gsignal , "\n" )
2014-11-11 17:04:34 -05:00
morebuf := thisg . m . morebuf
2014-12-22 10:53:51 -05:00
traceback ( morebuf . pc , morebuf . sp , morebuf . lr , morebuf . g . ptr ( ) )
2014-12-27 20:58:00 -08:00
throw ( "runtime: wrong goroutine in newstack" )
2014-11-11 17:04:34 -05:00
}
if thisg . m . curg . throwsplit {
gp := thisg . m . curg
// Update syscallsp, syscallpc in case traceback uses them.
morebuf := thisg . m . morebuf
gp . syscallsp = morebuf . sp
gp . syscallpc = morebuf . pc
print ( "runtime: newstack sp=" , hex ( gp . sched . sp ) , " stack=[" , hex ( gp . stack . lo ) , ", " , hex ( gp . stack . hi ) , "]\n" ,
"\tmorebuf={pc:" , hex ( morebuf . pc ) , " sp:" , hex ( morebuf . sp ) , " lr:" , hex ( morebuf . lr ) , "}\n" ,
"\tsched={pc:" , hex ( gp . sched . pc ) , " sp:" , hex ( gp . sched . sp ) , " lr:" , hex ( gp . sched . lr ) , " ctxt:" , gp . sched . ctxt , "}\n" )
2014-12-22 10:53:51 -05:00
traceback ( morebuf . pc , morebuf . sp , morebuf . lr , gp )
2014-12-27 20:58:00 -08:00
throw ( "runtime: stack split at bad time" )
2014-11-11 17:04:34 -05:00
}
gp := thisg . m . curg
morebuf := thisg . m . morebuf
thisg . m . morebuf . pc = 0
thisg . m . morebuf . lr = 0
thisg . m . morebuf . sp = 0
2014-12-22 10:53:51 -05:00
thisg . m . morebuf . g = 0
2015-01-13 15:55:16 -05:00
rewindmorestack ( & gp . sched )
2015-01-14 16:36:41 -05:00
// NOTE: stackguard0 may change underfoot, if another thread
// is about to try to preempt gp. Read it just once and use that same
// value now and below.
2015-11-02 14:09:24 -05:00
preempt := atomic . Loaduintptr ( & gp . stackguard0 ) == stackPreempt
2015-01-14 16:36:41 -05:00
2015-01-13 15:55:16 -05:00
// Be conservative about where we preempt.
// We are interested in preempting user Go code, not runtime code.
2015-01-30 15:30:41 -05:00
// If we're holding locks, mallocing, or preemption is disabled, don't
// preempt.
2015-01-13 15:55:16 -05:00
// This check is very early in newstack so that even the status change
// from Grunning to Gwaiting and back doesn't happen in this case.
// That status change by itself can be viewed as a small preemption,
// because the GC might change Gwaiting to Gscanwaiting, and then
// this goroutine has to wait for the GC to finish before continuing.
// If the GC is in some way dependent on this goroutine (for example,
// it needs a lock held by the goroutine), that small preemption turns
// into a real deadlock.
2015-01-14 16:36:41 -05:00
if preempt {
2015-04-17 00:21:30 -04:00
if thisg . m . locks != 0 || thisg . m . mallocing != 0 || thisg . m . preemptoff != "" || thisg . m . p . ptr ( ) . status != _Prunning {
2015-01-13 15:55:16 -05:00
// Let the goroutine keep running for now.
// gp->preempt is set, so it will be preempted next time.
gp . stackguard0 = gp . stack . lo + _StackGuard
gogo ( & gp . sched ) // never return
}
}
2014-11-11 17:04:34 -05:00
if gp . stack . lo == 0 {
2014-12-27 20:58:00 -08:00
throw ( "missing stack in newstack" )
2014-11-11 17:04:34 -05:00
}
sp := gp . sched . sp
2015-11-11 12:39:30 -05:00
if sys . TheChar == '6' || sys . TheChar == '8' {
2014-11-11 17:04:34 -05:00
// The call to morestack cost a word.
2015-11-11 12:39:30 -05:00
sp -= sys . PtrSize
2014-11-11 17:04:34 -05:00
}
if stackDebug >= 1 || sp < gp . stack . lo {
print ( "runtime: newstack sp=" , hex ( sp ) , " stack=[" , hex ( gp . stack . lo ) , ", " , hex ( gp . stack . hi ) , "]\n" ,
"\tmorebuf={pc:" , hex ( morebuf . pc ) , " sp:" , hex ( morebuf . sp ) , " lr:" , hex ( morebuf . lr ) , "}\n" ,
"\tsched={pc:" , hex ( gp . sched . pc ) , " sp:" , hex ( gp . sched . sp ) , " lr:" , hex ( gp . sched . lr ) , " ctxt:" , gp . sched . ctxt , "}\n" )
}
if sp < gp . stack . lo {
print ( "runtime: gp=" , gp , ", gp->status=" , hex ( readgstatus ( gp ) ) , "\n " )
print ( "runtime: split stack overflow: " , hex ( sp ) , " < " , hex ( gp . stack . lo ) , "\n" )
2014-12-27 20:58:00 -08:00
throw ( "runtime: split stack overflow" )
2014-11-11 17:04:34 -05:00
}
2014-11-15 08:00:38 -05:00
if gp . sched . ctxt != nil {
// morestack wrote sched.ctxt on its way in here,
// without a write barrier. Run the write barrier now.
// It is not possible to be preempted between then
// and now, so it's okay.
writebarrierptr_nostore ( ( * uintptr ) ( unsafe . Pointer ( & gp . sched . ctxt ) ) , uintptr ( gp . sched . ctxt ) )
}
2015-01-14 16:36:41 -05:00
if preempt {
2014-11-11 17:04:34 -05:00
if gp == thisg . m . g0 {
2014-12-27 20:58:00 -08:00
throw ( "runtime: preempt g0" )
2014-11-11 17:04:34 -05:00
}
2015-04-17 00:21:30 -04:00
if thisg . m . p == 0 && thisg . m . locks == 0 {
2014-12-27 20:58:00 -08:00
throw ( "runtime: g is running but p is not" )
2014-11-11 17:04:34 -05:00
}
2016-02-25 15:37:40 -05:00
// Synchronize with scang.
casgstatus ( gp , _Grunning , _Gwaiting )
2014-11-11 17:04:34 -05:00
if gp . preemptscan {
2014-11-21 16:46:27 -05:00
for ! castogscanstatus ( gp , _Gwaiting , _Gscanwaiting ) {
2015-05-28 12:37:12 -04:00
// Likely to be racing with the GC as
// it sees a _Gwaiting and does the
// stack scan. If so, gcworkdone will
// be set and gcphasework will simply
// return.
2014-11-21 16:46:27 -05:00
}
2015-06-16 19:20:18 -04:00
if ! gp . gcscandone {
scanstack ( gp )
gp . gcscandone = true
}
gp . preemptscan = false
gp . preempt = false
2014-11-21 16:46:27 -05:00
casfrom_Gscanstatus ( gp , _Gscanwaiting , _Gwaiting )
2014-11-11 17:04:34 -05:00
casgstatus ( gp , _Gwaiting , _Grunning )
2015-01-05 16:29:21 +00:00
gp . stackguard0 = gp . stack . lo + _StackGuard
2015-06-16 19:20:18 -04:00
gogo ( & gp . sched ) // never return
2014-11-11 17:04:34 -05:00
}
// Act like goroutine called runtime.Gosched.
casgstatus ( gp , _Gwaiting , _Grunning )
2014-12-12 18:41:57 +01:00
gopreempt_m ( gp ) // never return
2014-11-11 17:04:34 -05:00
}
// Allocate a bigger segment and move the stack.
2015-05-20 15:29:53 -04:00
oldsize := int ( gp . stackAlloc )
2014-11-11 17:04:34 -05:00
newsize := oldsize * 2
if uintptr ( newsize ) > maxstacksize {
print ( "runtime: goroutine stack exceeds " , maxstacksize , "-byte limit\n" )
2014-12-27 20:58:00 -08:00
throw ( "stack overflow" )
2014-11-11 17:04:34 -05:00
}
2016-02-25 15:37:40 -05:00
// The goroutine must be executing in order to call newstack,
// so it must be Grunning (or Gscanrunning).
casgstatus ( gp , _Grunning , _Gcopystack )
2014-11-15 08:00:38 -05:00
// The concurrent GC will not scan the stack while we are doing the copy since
// the gp is in a Gcopystack status.
2014-11-11 17:04:34 -05:00
copystack ( gp , uintptr ( newsize ) )
if stackDebug >= 1 {
print ( "stack grow done\n" )
}
2014-11-15 08:00:38 -05:00
casgstatus ( gp , _Gcopystack , _Grunning )
2014-11-11 17:04:34 -05:00
gogo ( & gp . sched )
}
//go:nosplit
func nilfunc ( ) {
* ( * uint8 ) ( nil ) = 0
}
// adjust Gobuf as if it executed a call to fn
// and then did an immediate gosave.
func gostartcallfn ( gobuf * gobuf , fv * funcval ) {
var fn unsafe . Pointer
if fv != nil {
2015-10-15 14:33:50 -07:00
fn = unsafe . Pointer ( fv . fn )
2014-11-11 17:04:34 -05:00
} else {
fn = unsafe . Pointer ( funcPC ( nilfunc ) )
}
2015-10-15 14:33:50 -07:00
gostartcall ( gobuf , fn , unsafe . Pointer ( fv ) )
2014-11-11 17:04:34 -05:00
}
// Maybe shrink the stack being used by gp.
// Called at garbage collection time.
func shrinkstack ( gp * g ) {
if readgstatus ( gp ) == _Gdead {
if gp . stack . lo != 0 {
// Free whole stack - it will get reallocated
// if G is used again.
2015-05-20 15:29:53 -04:00
stackfree ( gp . stack , gp . stackAlloc )
2014-11-11 17:04:34 -05:00
gp . stack . lo = 0
gp . stack . hi = 0
2015-05-20 16:16:04 -04:00
gp . stkbar = nil
gp . stkbarPos = 0
2014-11-11 17:04:34 -05:00
}
return
}
if gp . stack . lo == 0 {
2014-12-27 20:58:00 -08:00
throw ( "missing stack in shrinkstack" )
2014-11-11 17:04:34 -05:00
}
2015-06-05 11:51:49 -04:00
if debug . gcshrinkstackoff > 0 {
return
}
2015-05-20 15:29:53 -04:00
oldsize := gp . stackAlloc
2014-11-11 17:04:34 -05:00
newsize := oldsize / 2
runtime: account for stack guard when shrinking the stack
Currently, when shrinkstack computes whether the halved stack
allocation will have enough room for the stack, it accounts for the
stack space that's actively in use but fails to leave extra room for
the stack guard space. As a result, *if* the minimum stack size is
small enough or the guard large enough, it may shrink the stack and
leave less than enough room to run nosplit functions. If the next
function called after the stack shrink is a nosplit function, it may
overflow the stack without noticing and overwrite non-stack memory.
We don't think this is happening under normal conditions right now.
The minimum stack allocation is 2K and the guard is 640 bytes. The
"worst case" stack shrink is from 4K (4048 bytes after stack barrier
array reservation) to 2K (2016 bytes after stack barrier array
reservation), which means the largest "used" size that will qualify
for shrinking is 4048/4 - 8 = 1004 bytes. After copying, that leaves
2016 - 1004 = 1012 bytes of available stack, which is significantly
more than the guard space.
If we were to reduce the minimum stack size to 1K or raise the guard
space above 1012 bytes, the logic in shrinkstack would no longer leave
enough space.
It's also possible to trigger this problem by setting
firstStackBarrierOffset to 0, which puts stack barriers in a debug
mode that steals away *half* of the stack for the stack barrier array
reservation. Then, the largest "used" size that qualifies for
shrinking is (4096/2)/4 - 8 = 504 bytes. After copying, that leaves
(2096/2) - 504 = 8 bytes of available stack; much less than the
required guard space. This causes failures like those in issue #11027
because func gc() shrinks its own stack and then immediately calls
casgstatus (a nosplit function), which overflows the stack and
overwrites a free list pointer in the neighboring span. However, since
this seems to require the special debug mode, we don't think it's
responsible for issue #11027.
To forestall all of these subtle issues, this commit modifies
shrinkstack to correctly account for the guard space when considering
whether to halve the stack allocation.
Change-Id: I7312584addc63b5bfe55cc384a1012f6181f1b9d
Reviewed-on: https://go-review.googlesource.com/10714
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-04 17:28:02 -04:00
// Don't shrink the allocation below the minimum-sized stack
// allocation.
2014-11-11 17:04:34 -05:00
if newsize < _FixedStack {
runtime: account for stack guard when shrinking the stack
Currently, when shrinkstack computes whether the halved stack
allocation will have enough room for the stack, it accounts for the
stack space that's actively in use but fails to leave extra room for
the stack guard space. As a result, *if* the minimum stack size is
small enough or the guard large enough, it may shrink the stack and
leave less than enough room to run nosplit functions. If the next
function called after the stack shrink is a nosplit function, it may
overflow the stack without noticing and overwrite non-stack memory.
We don't think this is happening under normal conditions right now.
The minimum stack allocation is 2K and the guard is 640 bytes. The
"worst case" stack shrink is from 4K (4048 bytes after stack barrier
array reservation) to 2K (2016 bytes after stack barrier array
reservation), which means the largest "used" size that will qualify
for shrinking is 4048/4 - 8 = 1004 bytes. After copying, that leaves
2016 - 1004 = 1012 bytes of available stack, which is significantly
more than the guard space.
If we were to reduce the minimum stack size to 1K or raise the guard
space above 1012 bytes, the logic in shrinkstack would no longer leave
enough space.
It's also possible to trigger this problem by setting
firstStackBarrierOffset to 0, which puts stack barriers in a debug
mode that steals away *half* of the stack for the stack barrier array
reservation. Then, the largest "used" size that qualifies for
shrinking is (4096/2)/4 - 8 = 504 bytes. After copying, that leaves
(2096/2) - 504 = 8 bytes of available stack; much less than the
required guard space. This causes failures like those in issue #11027
because func gc() shrinks its own stack and then immediately calls
casgstatus (a nosplit function), which overflows the stack and
overwrites a free list pointer in the neighboring span. However, since
this seems to require the special debug mode, we don't think it's
responsible for issue #11027.
To forestall all of these subtle issues, this commit modifies
shrinkstack to correctly account for the guard space when considering
whether to halve the stack allocation.
Change-Id: I7312584addc63b5bfe55cc384a1012f6181f1b9d
Reviewed-on: https://go-review.googlesource.com/10714
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-04 17:28:02 -04:00
return
2014-11-11 17:04:34 -05:00
}
runtime: account for stack guard when shrinking the stack
Currently, when shrinkstack computes whether the halved stack
allocation will have enough room for the stack, it accounts for the
stack space that's actively in use but fails to leave extra room for
the stack guard space. As a result, *if* the minimum stack size is
small enough or the guard large enough, it may shrink the stack and
leave less than enough room to run nosplit functions. If the next
function called after the stack shrink is a nosplit function, it may
overflow the stack without noticing and overwrite non-stack memory.
We don't think this is happening under normal conditions right now.
The minimum stack allocation is 2K and the guard is 640 bytes. The
"worst case" stack shrink is from 4K (4048 bytes after stack barrier
array reservation) to 2K (2016 bytes after stack barrier array
reservation), which means the largest "used" size that will qualify
for shrinking is 4048/4 - 8 = 1004 bytes. After copying, that leaves
2016 - 1004 = 1012 bytes of available stack, which is significantly
more than the guard space.
If we were to reduce the minimum stack size to 1K or raise the guard
space above 1012 bytes, the logic in shrinkstack would no longer leave
enough space.
It's also possible to trigger this problem by setting
firstStackBarrierOffset to 0, which puts stack barriers in a debug
mode that steals away *half* of the stack for the stack barrier array
reservation. Then, the largest "used" size that qualifies for
shrinking is (4096/2)/4 - 8 = 504 bytes. After copying, that leaves
(2096/2) - 504 = 8 bytes of available stack; much less than the
required guard space. This causes failures like those in issue #11027
because func gc() shrinks its own stack and then immediately calls
casgstatus (a nosplit function), which overflows the stack and
overwrites a free list pointer in the neighboring span. However, since
this seems to require the special debug mode, we don't think it's
responsible for issue #11027.
To forestall all of these subtle issues, this commit modifies
shrinkstack to correctly account for the guard space when considering
whether to halve the stack allocation.
Change-Id: I7312584addc63b5bfe55cc384a1012f6181f1b9d
Reviewed-on: https://go-review.googlesource.com/10714
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-04 17:28:02 -04:00
// Compute how much of the stack is currently in use and only
// shrink the stack if gp is using less than a quarter of its
// current stack. The currently used stack includes everything
// down to the SP plus the stack guard space that ensures
// there's room for nosplit functions.
avail := gp . stack . hi - gp . stack . lo
if used := gp . stack . hi - gp . sched . sp + _StackLimit ; used >= avail / 4 {
return
2014-11-11 17:04:34 -05:00
}
// We can't copy the stack if we're in a syscall.
// The syscall might have pointers into the stack.
if gp . syscallsp != 0 {
return
}
2015-11-11 12:39:30 -05:00
if sys . GoosWindows != 0 && gp . m != nil && gp . m . libcallsp != 0 {
2014-11-11 17:04:34 -05:00
return
}
if stackDebug > 0 {
print ( "shrinking stack " , oldsize , "->" , newsize , "\n" )
}
2014-11-15 08:00:38 -05:00
2014-12-05 11:40:41 -05:00
oldstatus := casgcopystack ( gp )
2014-11-11 17:04:34 -05:00
copystack ( gp , newsize )
2014-11-15 08:00:38 -05:00
casgstatus ( gp , _Gcopystack , oldstatus )
2014-11-11 17:04:34 -05:00
}
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
// freeStackSpans frees unused stack spans at the end of GC.
func freeStackSpans ( ) {
2014-11-11 17:04:34 -05:00
lock ( & stackpoolmu )
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
// Scan stack pools for empty stack spans.
for order := range stackpool {
list := & stackpool [ order ]
2015-10-15 15:59:49 -07:00
for s := list . first ; s != nil ; {
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
next := s . next
if s . ref == 0 {
2015-11-11 16:13:51 -08:00
list . remove ( s )
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
s . freelist = 0
2015-11-11 16:13:51 -08:00
mheap_ . freeStack ( s )
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
}
s = next
}
2014-11-11 17:04:34 -05:00
}
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
unlock ( & stackpoolmu )
2015-12-14 14:30:25 -05:00
// Free large stack spans.
lock ( & stackLarge . lock )
for i := range stackLarge . free {
for s := stackLarge . free [ i ] . first ; s != nil ; {
next := s . next
stackLarge . free [ i ] . remove ( s )
mheap_ . freeStack ( s )
s = next
}
}
unlock ( & stackLarge . lock )
2014-11-11 17:04:34 -05:00
}
2015-01-05 16:29:21 +00:00
//go:nosplit
func morestackc ( ) {
systemstack ( func ( ) {
throw ( "attempt to execute C code on Go stack" )
} )
}