2014-11-11 17:04:34 -05:00
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runtime
2015-11-02 14:09:24 -05:00
import (
"runtime/internal/atomic"
2015-11-11 12:39:30 -05:00
"runtime/internal/sys"
2015-11-02 14:09:24 -05:00
"unsafe"
)
2014-11-11 17:04:34 -05:00
2015-10-16 18:45:30 -07:00
/ *
Stack layout parameters .
Included both by runtime ( compiled via 6 c ) and linkers ( compiled via gcc ) .
The per - goroutine g - > stackguard is set to point StackGuard bytes
above the bottom of the stack . Each function compares its stack
pointer against g - > stackguard to check for overflow . To cut one
instruction from the check sequence for functions with tiny frames ,
the stack is allowed to protrude StackSmall bytes below the stack
guard . Functions with large frames don ' t bother with the check and
always call morestack . The sequences are ( for amd64 , others are
similar ) :
guard = g - > stackguard
frame = function ' s stack frame size
argsize = size of function arguments ( call + return )
stack frame size <= StackSmall :
CMPQ guard , SP
JHI 3 ( PC )
MOVQ m - > morearg , $ ( argsize << 32 )
CALL morestack ( SB )
stack frame size > StackSmall but < StackBig
LEAQ ( frame - StackSmall ) ( SP ) , R0
CMPQ guard , R0
JHI 3 ( PC )
MOVQ m - > morearg , $ ( argsize << 32 )
CALL morestack ( SB )
stack frame size >= StackBig :
MOVQ m - > morearg , $ ( ( argsize << 32 ) | frame )
CALL morestack ( SB )
The bottom StackGuard - StackSmall bytes are important : there has
to be enough room to execute functions that refuse to check for
stack overflow , either because they need to be adjacent to the
actual caller ' s frame ( deferproc ) or because they handle the imminent
stack overflow ( morestack ) .
For example , deferproc might call malloc , which does one of the
above checks ( without allocating a full frame ) , which might trigger
a call to morestack . This sequence needs to fit in the bottom
section of the stack . On amd64 , morestack ' s frame is 40 bytes , and
deferproc ' s frame is 56 bytes . That fits well within the
StackGuard - StackSmall bytes at the bottom .
The linkers explore all possible call traces involving non - splitting
functions to make sure that this limit cannot be violated .
* /
const (
// StackSystem is a number of additional bytes to add
// to each stack below the usual guard area for OS-specific
// purposes like signal handling. Used on Windows, Plan 9,
2018-06-07 12:19:42 +02:00
// and iOS because they do not use a separate stack.
_StackSystem = sys . GoosWindows * 512 * sys . PtrSize + sys . GoosPlan9 * 512 + sys . GoosDarwin * sys . GoarchArm * 1024 + sys . GoosDarwin * sys . GoarchArm64 * 1024
2015-10-16 18:45:30 -07:00
// The minimum size of stack used by Go code
_StackMin = 2048
// The minimum stack size to allocate.
// The hackery here rounds FixedStack0 up to a power of 2.
_FixedStack0 = _StackMin + _StackSystem
_FixedStack1 = _FixedStack0 - 1
_FixedStack2 = _FixedStack1 | ( _FixedStack1 >> 1 )
_FixedStack3 = _FixedStack2 | ( _FixedStack2 >> 2 )
_FixedStack4 = _FixedStack3 | ( _FixedStack3 >> 4 )
_FixedStack5 = _FixedStack4 | ( _FixedStack4 >> 8 )
_FixedStack6 = _FixedStack5 | ( _FixedStack5 >> 16 )
_FixedStack = _FixedStack6 + 1
// Functions that need frames bigger than this use an extra
// instruction to do the stack split check, to avoid overflow
// in case SP - framesize wraps below zero.
// This value can be no bigger than the size of the unmapped
// space at zero.
_StackBig = 4096
// The stack guard is a pointer this many bytes above the
// bottom of the stack.
2016-08-15 13:51:00 -07:00
_StackGuard = 880 * sys . StackGuardMultiplier + _StackSystem
2015-10-16 18:45:30 -07:00
// After a stack split check the SP is allowed to be this
2016-03-01 23:21:55 +00:00
// many bytes below the stack guard. This saves an instruction
2015-10-16 18:45:30 -07:00
// in the checking sequence for tiny frames.
_StackSmall = 128
// The maximum number of bytes that a chain of NOSPLIT
// functions can use.
_StackLimit = _StackGuard - _StackSystem - _StackSmall
)
2014-11-11 17:04:34 -05:00
const (
2015-05-01 15:53:45 +10:00
// stackDebug == 0: no logging
2014-11-11 17:04:34 -05:00
// == 1: logging of per-stack operations
// == 2: logging of per-frame operations
// == 3: logging of per-word updates
// == 4: logging of per-word reads
stackDebug = 0
stackFromSystem = 0 // allocate stacks from system memory instead of the heap
stackFaultOnFree = 0 // old stacks are mapped noaccess to detect use after free
stackPoisonCopy = 0 // fill stack that should not be accessed with garbage, to detect bad dereferences during copy
2017-05-18 14:35:53 -04:00
stackNoCache = 0 // disable per-P small stack caches
2016-12-02 15:17:52 -08:00
// check the BP links during traceback.
debugCheckBP = false
2014-11-11 17:04:34 -05:00
)
const (
2015-11-11 12:39:30 -05:00
uintptrMask = 1 << ( 8 * sys . PtrSize ) - 1
2014-11-11 17:04:34 -05:00
// Goroutine preemption request.
2015-01-05 16:29:21 +00:00
// Stored into g->stackguard0 to cause split stack check failure.
2014-11-11 17:04:34 -05:00
// Must be greater than any real sp.
// 0xfffffade in hex.
stackPreempt = uintptrMask & - 1314
// Thread is forking.
2015-01-05 16:29:21 +00:00
// Stored into g->stackguard0 to cause split stack check failure.
2014-11-11 17:04:34 -05:00
// Must be greater than any real sp.
stackFork = uintptrMask & - 1234
)
// Global pool of spans that have free stacks.
// Stacks are assigned an order according to size.
// order = log_2(size/FixedStack)
// There is a free list for each order.
// TODO: one lock per order?
2015-10-15 15:59:49 -07:00
var stackpool [ _NumStackOrders ] mSpanList
2014-11-11 17:04:34 -05:00
var stackpoolmu mutex
2015-12-14 14:30:25 -05:00
// Global pool of large stack spans.
var stackLarge struct {
lock mutex
2018-02-20 11:59:02 -05:00
free [ heapAddrBits - pageShift ] mSpanList // free lists by log_2(s.npages)
2015-12-14 14:30:25 -05:00
}
2014-11-11 17:04:34 -05:00
func stackinit ( ) {
if _StackCacheSize & _PageMask != 0 {
2014-12-27 20:58:00 -08:00
throw ( "cache size must be a multiple of page size" )
2014-11-11 17:04:34 -05:00
}
for i := range stackpool {
2015-11-11 16:13:51 -08:00
stackpool [ i ] . init ( )
2014-11-11 17:04:34 -05:00
}
2015-12-14 14:30:25 -05:00
for i := range stackLarge . free {
stackLarge . free [ i ] . init ( )
}
}
// stacklog2 returns ⌊log_2(n)⌋.
func stacklog2 ( n uintptr ) int {
log2 := 0
for n > 1 {
n >>= 1
log2 ++
}
return log2
2014-11-11 17:04:34 -05:00
}
2016-03-01 23:21:55 +00:00
// Allocates a stack from the free pool. Must be called with
2014-11-11 17:04:34 -05:00
// stackpoolmu held.
2014-11-20 12:08:13 -05:00
func stackpoolalloc ( order uint8 ) gclinkptr {
2014-11-11 17:04:34 -05:00
list := & stackpool [ order ]
2015-10-15 15:59:49 -07:00
s := list . first
if s == nil {
2016-03-01 23:21:55 +00:00
// no free stacks. Allocate another span worth.
2017-03-16 14:46:53 -04:00
s = mheap_ . allocManual ( _StackCacheSize >> _PageShift , & memstats . stacks_inuse )
2014-11-11 17:04:34 -05:00
if s == nil {
2014-12-27 20:58:00 -08:00
throw ( "out of memory" )
2014-11-11 17:04:34 -05:00
}
2016-02-16 17:16:43 -05:00
if s . allocCount != 0 {
throw ( "bad allocCount" )
2014-11-11 17:04:34 -05:00
}
2017-03-16 15:02:02 -04:00
if s . manualFreeList . ptr ( ) != nil {
throw ( "bad manualFreeList" )
2014-11-11 17:04:34 -05:00
}
2018-06-29 14:56:48 -04:00
osStackAlloc ( s )
2017-03-16 14:55:10 -04:00
s . elemsize = _FixedStack << order
for i := uintptr ( 0 ) ; i < _StackCacheSize ; i += s . elemsize {
2016-04-28 10:59:00 -04:00
x := gclinkptr ( s . base ( ) + i )
2017-03-16 15:02:02 -04:00
x . ptr ( ) . next = s . manualFreeList
s . manualFreeList = x
2014-11-11 17:04:34 -05:00
}
2015-11-11 16:13:51 -08:00
list . insert ( s )
2014-11-11 17:04:34 -05:00
}
2017-03-16 15:02:02 -04:00
x := s . manualFreeList
2014-11-20 12:08:13 -05:00
if x . ptr ( ) == nil {
2014-12-27 20:58:00 -08:00
throw ( "span has no free stacks" )
2014-11-11 17:04:34 -05:00
}
2017-03-16 15:02:02 -04:00
s . manualFreeList = x . ptr ( ) . next
2016-02-16 17:16:43 -05:00
s . allocCount ++
2017-03-16 15:02:02 -04:00
if s . manualFreeList . ptr ( ) == nil {
2014-11-11 17:04:34 -05:00
// all stacks in s are allocated.
2015-11-11 16:13:51 -08:00
list . remove ( s )
2014-11-11 17:04:34 -05:00
}
return x
}
2016-03-01 23:21:55 +00:00
// Adds stack x to the free pool. Must be called with stackpoolmu held.
2014-11-20 12:08:13 -05:00
func stackpoolfree ( x gclinkptr , order uint8 ) {
2017-12-04 10:58:15 -05:00
s := spanOfUnchecked ( uintptr ( x ) )
2018-09-26 16:39:02 -04:00
if s . state != mSpanManual {
2014-12-27 20:58:00 -08:00
throw ( "freeing stack not in a stack span" )
2014-11-11 17:04:34 -05:00
}
2017-03-16 15:02:02 -04:00
if s . manualFreeList . ptr ( ) == nil {
2014-11-11 17:04:34 -05:00
// s will now have a free stack
2015-11-11 16:13:51 -08:00
stackpool [ order ] . insert ( s )
2014-11-11 17:04:34 -05:00
}
2017-03-16 15:02:02 -04:00
x . ptr ( ) . next = s . manualFreeList
s . manualFreeList = x
2016-02-16 17:16:43 -05:00
s . allocCount --
if gcphase == _GCoff && s . allocCount == 0 {
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
// Span is completely free. Return it to the heap
// immediately if we're sweeping.
//
// If GC is active, we delay the free until the end of
// GC to avoid the following type of situation:
//
// 1) GC starts, scans a SudoG but does not yet mark the SudoG.elem pointer
// 2) The stack that pointer points to is copied
// 3) The old stack is freed
// 4) The containing span is marked free
// 5) GC attempts to mark the SudoG.elem pointer. The
// marking fails because the pointer looks like a
// pointer into a free span.
//
// By not freeing, we prevent step #4 until GC is done.
2015-11-11 16:13:51 -08:00
stackpool [ order ] . remove ( s )
2017-03-16 15:02:02 -04:00
s . manualFreeList = 0
2018-06-29 14:56:48 -04:00
osStackFree ( s )
2017-03-16 14:46:53 -04:00
mheap_ . freeManual ( s , & memstats . stacks_inuse )
2014-11-11 17:04:34 -05:00
}
}
// stackcacherefill/stackcacherelease implement a global pool of stack segments.
// The pool is required to prevent unlimited growth of per-thread caches.
2016-05-27 12:21:14 -04:00
//
//go:systemstack
2014-11-11 17:04:34 -05:00
func stackcacherefill ( c * mcache , order uint8 ) {
if stackDebug >= 1 {
print ( "stackcacherefill order=" , order , "\n" )
}
// Grab some stacks from the global cache.
// Grab half of the allowed capacity (to prevent thrashing).
2014-11-20 12:08:13 -05:00
var list gclinkptr
2014-11-11 17:04:34 -05:00
var size uintptr
lock ( & stackpoolmu )
for size < _StackCacheSize / 2 {
x := stackpoolalloc ( order )
2014-11-20 12:08:13 -05:00
x . ptr ( ) . next = list
2014-11-11 17:04:34 -05:00
list = x
size += _FixedStack << order
}
unlock ( & stackpoolmu )
c . stackcache [ order ] . list = list
c . stackcache [ order ] . size = size
}
2016-05-27 12:21:14 -04:00
//go:systemstack
2014-11-11 17:04:34 -05:00
func stackcacherelease ( c * mcache , order uint8 ) {
if stackDebug >= 1 {
print ( "stackcacherelease order=" , order , "\n" )
}
x := c . stackcache [ order ] . list
size := c . stackcache [ order ] . size
lock ( & stackpoolmu )
for size > _StackCacheSize / 2 {
2014-11-20 12:08:13 -05:00
y := x . ptr ( ) . next
2014-11-11 17:04:34 -05:00
stackpoolfree ( x , order )
x = y
size -= _FixedStack << order
}
unlock ( & stackpoolmu )
c . stackcache [ order ] . list = x
c . stackcache [ order ] . size = size
}
2016-05-27 12:21:14 -04:00
//go:systemstack
2014-11-11 17:04:34 -05:00
func stackcache_clear ( c * mcache ) {
if stackDebug >= 1 {
print ( "stackcache clear\n" )
}
lock ( & stackpoolmu )
for order := uint8 ( 0 ) ; order < _NumStackOrders ; order ++ {
x := c . stackcache [ order ] . list
2014-11-20 12:08:13 -05:00
for x . ptr ( ) != nil {
y := x . ptr ( ) . next
2014-11-11 17:04:34 -05:00
stackpoolfree ( x , order )
x = y
}
2014-11-20 12:08:13 -05:00
c . stackcache [ order ] . list = 0
2014-11-11 17:04:34 -05:00
c . stackcache [ order ] . size = 0
}
unlock ( & stackpoolmu )
}
2016-05-27 12:21:14 -04:00
// stackalloc allocates an n byte stack.
//
// stackalloc must run on the system stack because it uses per-P
// resources and must not split the stack.
//
//go:systemstack
2017-02-09 14:03:49 -05:00
func stackalloc ( n uint32 ) stack {
2014-11-11 17:04:34 -05:00
// Stackalloc must be called on scheduler stack, so that we
// never try to grow the stack during the code that stackalloc runs.
// Doing so would cause a deadlock (issue 1547).
thisg := getg ( )
if thisg != thisg . m . g0 {
2014-12-27 20:58:00 -08:00
throw ( "stackalloc not on scheduler stack" )
2014-11-11 17:04:34 -05:00
}
if n & ( n - 1 ) != 0 {
2014-12-27 20:58:00 -08:00
throw ( "stack size not a power of 2" )
2014-11-11 17:04:34 -05:00
}
if stackDebug >= 1 {
print ( "stackalloc " , n , "\n" )
}
if debug . efence != 0 || stackFromSystem != 0 {
2017-05-18 13:59:00 -04:00
n = uint32 ( round ( uintptr ( n ) , physPageSize ) )
v := sysAlloc ( uintptr ( n ) , & memstats . stacks_sys )
2014-11-11 17:04:34 -05:00
if v == nil {
2014-12-27 20:58:00 -08:00
throw ( "out of memory (stackalloc)" )
2014-11-11 17:04:34 -05:00
}
2017-02-09 14:03:49 -05:00
return stack { uintptr ( v ) , uintptr ( v ) + uintptr ( n ) }
2014-11-11 17:04:34 -05:00
}
// Small stacks are allocated with a fixed-size free-list allocator.
// If we need a stack of a bigger size, we fall back on allocating
// a dedicated span.
var v unsafe . Pointer
2017-05-18 14:35:53 -04:00
if n < _FixedStack << _NumStackOrders && n < _StackCacheSize {
2014-11-11 17:04:34 -05:00
order := uint8 ( 0 )
n2 := n
for n2 > _FixedStack {
order ++
n2 >>= 1
}
2014-11-20 12:08:13 -05:00
var x gclinkptr
2014-11-11 17:04:34 -05:00
c := thisg . m . mcache
2018-08-26 21:33:26 -04:00
if stackNoCache != 0 || c == nil || thisg . m . preemptoff != "" {
2014-11-11 17:04:34 -05:00
// c == nil can happen in the guts of exitsyscall or
// procresize. Just get a stack from the global pool.
// Also don't touch stackcache during gc
// as it's flushed concurrently.
lock ( & stackpoolmu )
x = stackpoolalloc ( order )
unlock ( & stackpoolmu )
} else {
x = c . stackcache [ order ] . list
2014-11-20 12:08:13 -05:00
if x . ptr ( ) == nil {
2014-11-11 17:04:34 -05:00
stackcacherefill ( c , order )
x = c . stackcache [ order ] . list
}
2014-11-20 12:08:13 -05:00
c . stackcache [ order ] . list = x . ptr ( ) . next
2014-11-11 17:04:34 -05:00
c . stackcache [ order ] . size -= uintptr ( n )
}
2015-10-15 14:33:50 -07:00
v = unsafe . Pointer ( x )
2014-11-11 17:04:34 -05:00
} else {
2015-12-14 14:30:25 -05:00
var s * mspan
npage := uintptr ( n ) >> _PageShift
log2npage := stacklog2 ( npage )
// Try to get a stack from the large stack cache.
lock ( & stackLarge . lock )
if ! stackLarge . free [ log2npage ] . isEmpty ( ) {
s = stackLarge . free [ log2npage ] . first
stackLarge . free [ log2npage ] . remove ( s )
}
unlock ( & stackLarge . lock )
2014-11-11 17:04:34 -05:00
if s == nil {
2015-12-14 14:30:25 -05:00
// Allocate a new stack from the heap.
2017-03-16 14:46:53 -04:00
s = mheap_ . allocManual ( npage , & memstats . stacks_inuse )
2015-12-14 14:30:25 -05:00
if s == nil {
throw ( "out of memory" )
}
2018-06-29 14:56:48 -04:00
osStackAlloc ( s )
2017-03-16 14:55:10 -04:00
s . elemsize = uintptr ( n )
2014-11-11 17:04:34 -05:00
}
2016-04-28 10:59:00 -04:00
v = unsafe . Pointer ( s . base ( ) )
2014-11-11 17:04:34 -05:00
}
if raceenabled {
racemalloc ( v , uintptr ( n ) )
}
2015-10-21 11:04:42 -07:00
if msanenabled {
msanmalloc ( v , uintptr ( n ) )
}
2014-11-11 17:04:34 -05:00
if stackDebug >= 1 {
print ( " allocated " , v , "\n" )
}
2017-02-09 14:03:49 -05:00
return stack { uintptr ( v ) , uintptr ( v ) + uintptr ( n ) }
2014-11-11 17:04:34 -05:00
}
2016-05-27 12:21:14 -04:00
// stackfree frees an n byte stack allocation at stk.
//
// stackfree must run on the system stack because it uses per-P
// resources and must not split the stack.
//
//go:systemstack
2017-02-09 14:11:13 -05:00
func stackfree ( stk stack ) {
2014-11-11 17:04:34 -05:00
gp := getg ( )
2015-10-15 14:33:50 -07:00
v := unsafe . Pointer ( stk . lo )
2017-02-09 14:11:13 -05:00
n := stk . hi - stk . lo
2014-11-11 17:04:34 -05:00
if n & ( n - 1 ) != 0 {
2014-12-27 20:58:00 -08:00
throw ( "stack not a power of 2" )
2014-11-11 17:04:34 -05:00
}
2015-05-20 15:29:53 -04:00
if stk . lo + n < stk . hi {
throw ( "bad stack size" )
}
2014-11-11 17:04:34 -05:00
if stackDebug >= 1 {
println ( "stackfree" , v , n )
2016-10-17 18:41:56 -04:00
memclrNoHeapPointers ( v , n ) // for testing, clobber stack data
2014-11-11 17:04:34 -05:00
}
if debug . efence != 0 || stackFromSystem != 0 {
if debug . efence != 0 || stackFaultOnFree != 0 {
sysFault ( v , n )
} else {
sysFree ( v , n , & memstats . stacks_sys )
}
return
}
2015-10-21 11:04:42 -07:00
if msanenabled {
msanfree ( v , n )
}
2017-05-18 14:35:53 -04:00
if n < _FixedStack << _NumStackOrders && n < _StackCacheSize {
2014-11-11 17:04:34 -05:00
order := uint8 ( 0 )
n2 := n
for n2 > _FixedStack {
order ++
n2 >>= 1
}
2014-11-20 12:08:13 -05:00
x := gclinkptr ( v )
2014-11-11 17:04:34 -05:00
c := gp . m . mcache
2018-08-26 21:33:26 -04:00
if stackNoCache != 0 || c == nil || gp . m . preemptoff != "" {
2014-11-11 17:04:34 -05:00
lock ( & stackpoolmu )
stackpoolfree ( x , order )
unlock ( & stackpoolmu )
} else {
if c . stackcache [ order ] . size >= _StackCacheSize {
stackcacherelease ( c , order )
}
2014-11-20 12:08:13 -05:00
x . ptr ( ) . next = c . stackcache [ order ] . list
2014-11-11 17:04:34 -05:00
c . stackcache [ order ] . list = x
c . stackcache [ order ] . size += n
}
} else {
2017-12-04 10:58:15 -05:00
s := spanOfUnchecked ( uintptr ( v ) )
2018-09-26 16:39:02 -04:00
if s . state != mSpanManual {
2016-04-28 10:59:00 -04:00
println ( hex ( s . base ( ) ) , v )
2014-12-27 20:58:00 -08:00
throw ( "bad span state" )
2014-11-11 17:04:34 -05:00
}
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
if gcphase == _GCoff {
// Free the stack immediately if we're
// sweeping.
2018-06-29 14:56:48 -04:00
osStackFree ( s )
2017-03-16 14:46:53 -04:00
mheap_ . freeManual ( s , & memstats . stacks_inuse )
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
} else {
2015-12-14 14:30:25 -05:00
// If the GC is running, we can't return a
// stack span to the heap because it could be
// reused as a heap span, and this state
// change would race with GC. Add it to the
// large stack cache instead.
log2npage := stacklog2 ( s . npages )
lock ( & stackLarge . lock )
stackLarge . free [ log2npage ] . insert ( s )
unlock ( & stackLarge . lock )
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
}
2014-11-11 17:04:34 -05:00
}
}
var maxstacksize uintptr = 1 << 20 // enough until runtime.main sets it for real
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
var ptrnames = [ ] string {
0 : "scalar" ,
1 : "ptr" ,
2014-11-11 17:04:34 -05:00
}
// Stack frame layout
//
// (x86)
// +------------------+
// | args from caller |
// +------------------+ <- frame->argp
// | return address |
2015-01-14 11:09:50 -05:00
// +------------------+
// | caller's BP (*) | (*) if framepointer_enabled && varp < sp
2014-11-11 17:04:34 -05:00
// +------------------+ <- frame->varp
// | locals |
// +------------------+
// | args to callee |
// +------------------+ <- frame->sp
//
// (arm)
// +------------------+
// | args from caller |
// +------------------+ <- frame->argp
// | caller's retaddr |
// +------------------+ <- frame->varp
// | locals |
// +------------------+
// | args to callee |
// +------------------+
// | return address |
// +------------------+ <- frame->sp
type adjustinfo struct {
old stack
delta uintptr // ptr distance from old to new stack (newbase - oldbase)
runtime: add pcvalue cache to improve stack scan speed
The cost of scanning large stacks is currently dominated by the time
spent looking up and decoding the pcvalue table. However, large stacks
are usually large not because they contain calls to many different
functions, but because they contain many calls to the same, small set
of recursive functions. Hence, walking large stacks tends to make the
same pcvalue queries many times.
Based on this observation, this commit adds a small, very simple, and
fast cache in front of pcvalue lookup. We thread this cache down from
operations that make many pcvalue calls, such as gentraceback, stack
scanning, and stack adjusting.
This simple cache works well because it has minimal overhead when it's
not effective. I also tried a hashed direct-map cache, CLOCK-based
replacement, round-robin replacement, and round-robin with lookups
disabled until there had been at least 16 probes, but none of these
approaches had obvious wins over the random replacement policy in this
commit.
This nearly doubles the overall performance of the deep stack test
program from issue #10898:
name old time/op new time/op delta
Issue10898 16.5s ±12% 9.2s ±12% -44.37% (p=0.008 n=5+5)
It's a very slight win on the garbage benchmark:
name old time/op new time/op delta
XBenchGarbage-12 4.92ms ± 1% 4.89ms ± 1% -0.75% (p=0.000 n=18+19)
It's a wash (but doesn't harm performance) on the go1 benchmarks,
which don't have particularly deep stacks:
name old time/op new time/op delta
BinaryTree17-12 3.11s ± 2% 3.20s ± 3% +2.83% (p=0.000 n=17+20)
Fannkuch11-12 2.51s ± 1% 2.51s ± 1% -0.22% (p=0.034 n=19+18)
FmtFprintfEmpty-12 50.8ns ± 3% 50.6ns ± 2% ~ (p=0.793 n=20+20)
FmtFprintfString-12 174ns ± 0% 174ns ± 1% +0.17% (p=0.048 n=15+20)
FmtFprintfInt-12 177ns ± 0% 165ns ± 1% -6.99% (p=0.000 n=17+19)
FmtFprintfIntInt-12 283ns ± 1% 284ns ± 0% +0.22% (p=0.000 n=18+15)
FmtFprintfPrefixedInt-12 243ns ± 1% 244ns ± 1% +0.40% (p=0.000 n=20+19)
FmtFprintfFloat-12 318ns ± 0% 319ns ± 0% +0.27% (p=0.001 n=19+20)
FmtManyArgs-12 1.12µs ± 0% 1.14µs ± 0% +1.74% (p=0.000 n=19+20)
GobDecode-12 8.69ms ± 0% 8.73ms ± 1% +0.46% (p=0.000 n=18+18)
GobEncode-12 6.64ms ± 1% 6.61ms ± 1% -0.46% (p=0.000 n=20+20)
Gzip-12 323ms ± 2% 319ms ± 1% -1.11% (p=0.000 n=20+20)
Gunzip-12 42.8ms ± 0% 42.9ms ± 0% ~ (p=0.158 n=18+20)
HTTPClientServer-12 63.3µs ± 1% 63.1µs ± 1% -0.35% (p=0.011 n=20+20)
JSONEncode-12 16.9ms ± 1% 17.3ms ± 1% +2.84% (p=0.000 n=19+20)
JSONDecode-12 59.7ms ± 0% 58.5ms ± 0% -2.05% (p=0.000 n=19+17)
Mandelbrot200-12 3.92ms ± 0% 3.91ms ± 0% -0.16% (p=0.003 n=19+19)
GoParse-12 3.79ms ± 2% 3.75ms ± 2% -0.91% (p=0.005 n=20+20)
RegexpMatchEasy0_32-12 102ns ± 1% 101ns ± 1% -0.80% (p=0.001 n=14+20)
RegexpMatchEasy0_1K-12 337ns ± 1% 346ns ± 1% +2.90% (p=0.000 n=20+19)
RegexpMatchEasy1_32-12 84.4ns ± 2% 84.3ns ± 2% ~ (p=0.743 n=20+20)
RegexpMatchEasy1_1K-12 502ns ± 1% 505ns ± 0% +0.64% (p=0.000 n=20+20)
RegexpMatchMedium_32-12 133ns ± 1% 132ns ± 1% -0.85% (p=0.000 n=20+19)
RegexpMatchMedium_1K-12 40.1µs ± 1% 39.8µs ± 1% -0.77% (p=0.000 n=18+18)
RegexpMatchHard_32-12 2.08µs ± 1% 2.07µs ± 1% -0.55% (p=0.001 n=18+19)
RegexpMatchHard_1K-12 62.4µs ± 1% 62.0µs ± 1% -0.74% (p=0.000 n=19+19)
Revcomp-12 545ms ± 2% 545ms ± 3% ~ (p=0.771 n=19+20)
Template-12 73.7ms ± 1% 72.0ms ± 0% -2.33% (p=0.000 n=20+18)
TimeParse-12 358ns ± 1% 351ns ± 1% -2.07% (p=0.000 n=20+20)
TimeFormat-12 369ns ± 1% 356ns ± 0% -3.53% (p=0.000 n=20+18)
[Geo mean] 63.5µs 63.2µs -0.41%
name old speed new speed delta
GobDecode-12 88.3MB/s ± 0% 87.9MB/s ± 0% -0.43% (p=0.000 n=18+17)
GobEncode-12 116MB/s ± 1% 116MB/s ± 1% +0.47% (p=0.000 n=20+20)
Gzip-12 60.2MB/s ± 2% 60.8MB/s ± 1% +1.13% (p=0.000 n=20+20)
Gunzip-12 453MB/s ± 0% 453MB/s ± 0% ~ (p=0.160 n=18+20)
JSONEncode-12 115MB/s ± 1% 112MB/s ± 1% -2.76% (p=0.000 n=19+20)
JSONDecode-12 32.5MB/s ± 0% 33.2MB/s ± 0% +2.09% (p=0.000 n=19+17)
GoParse-12 15.3MB/s ± 2% 15.4MB/s ± 2% +0.92% (p=0.004 n=20+20)
RegexpMatchEasy0_32-12 311MB/s ± 1% 314MB/s ± 1% +0.78% (p=0.000 n=15+19)
RegexpMatchEasy0_1K-12 3.04GB/s ± 1% 2.95GB/s ± 1% -2.90% (p=0.000 n=19+19)
RegexpMatchEasy1_32-12 379MB/s ± 2% 380MB/s ± 2% ~ (p=0.779 n=20+20)
RegexpMatchEasy1_1K-12 2.04GB/s ± 1% 2.02GB/s ± 0% -0.62% (p=0.000 n=20+20)
RegexpMatchMedium_32-12 7.46MB/s ± 1% 7.53MB/s ± 1% +0.86% (p=0.000 n=20+19)
RegexpMatchMedium_1K-12 25.5MB/s ± 1% 25.7MB/s ± 1% +0.78% (p=0.000 n=18+18)
RegexpMatchHard_32-12 15.4MB/s ± 1% 15.5MB/s ± 1% +0.62% (p=0.000 n=19+19)
RegexpMatchHard_1K-12 16.4MB/s ± 1% 16.5MB/s ± 1% +0.82% (p=0.000 n=20+19)
Revcomp-12 466MB/s ± 2% 466MB/s ± 3% ~ (p=0.765 n=19+20)
Template-12 26.3MB/s ± 1% 27.0MB/s ± 0% +2.38% (p=0.000 n=20+18)
[Geo mean] 97.8MB/s 98.0MB/s +0.23%
Change-Id: I281044ae0b24990ba46487cacbc1069493274bc4
Reviewed-on: https://go-review.googlesource.com/13614
Reviewed-by: Keith Randall <khr@golang.org>
2015-08-12 23:43:43 -04:00
cache pcvalueCache
2016-02-15 17:38:06 -05:00
// sghi is the highest sudog.elem on the stack.
sghi uintptr
2014-11-11 17:04:34 -05:00
}
// Adjustpointer checks whether *vpp is in the old stack described by adjinfo.
// If so, it rewrites *vpp to point into the new stack.
func adjustpointer ( adjinfo * adjustinfo , vpp unsafe . Pointer ) {
2015-11-23 11:34:16 -05:00
pp := ( * uintptr ) ( vpp )
2014-11-11 17:04:34 -05:00
p := * pp
if stackDebug >= 4 {
2015-11-23 11:34:16 -05:00
print ( " " , pp , ":" , hex ( p ) , "\n" )
2014-11-11 17:04:34 -05:00
}
2015-11-23 11:34:16 -05:00
if adjinfo . old . lo <= p && p < adjinfo . old . hi {
* pp = p + adjinfo . delta
2014-11-11 17:04:34 -05:00
if stackDebug >= 3 {
2015-11-23 11:34:16 -05:00
print ( " adjust ptr " , pp , ":" , hex ( p ) , " -> " , hex ( * pp ) , "\n" )
2014-11-11 17:04:34 -05:00
}
}
}
2015-05-04 10:19:24 -04:00
// Information from the compiler about the layout of stack frames.
type bitvector struct {
n int32 // # of bits
bytedata * uint8
}
2018-04-01 11:01:36 -07:00
// ptrbit returns the i'th bit in bv.
// ptrbit is less efficient than iterating directly over bitvector bits,
// and should only be used in non-performance-critical code.
// See adjustpointers for an example of a high-efficiency walk of a bitvector.
func ( bv * bitvector ) ptrbit ( i uintptr ) uint8 {
b := * ( addb ( bv . bytedata , i / 8 ) )
return ( b >> ( i % 8 ) ) & 1
2014-11-11 17:04:34 -05:00
}
// bv describes the memory starting at address scanp.
// Adjust any pointers contained therein.
2018-04-01 11:01:36 -07:00
func adjustpointers ( scanp unsafe . Pointer , bv * bitvector , adjinfo * adjustinfo , f funcInfo ) {
2014-11-11 17:04:34 -05:00
minp := adjinfo . old . lo
maxp := adjinfo . old . hi
delta := adjinfo . delta
2018-04-01 11:01:36 -07:00
num := uintptr ( bv . n )
2016-02-15 17:38:06 -05:00
// If this frame might contain channel receive slots, use CAS
// to adjust pointers. If the slot hasn't been received into
// yet, it may contain stack pointers and a concurrent send
// could race with adjusting those pointers. (The sent value
// itself can never contain stack pointers.)
useCAS := uintptr ( scanp ) < adjinfo . sghi
2018-04-01 11:01:36 -07:00
for i := uintptr ( 0 ) ; i < num ; i += 8 {
2014-11-11 17:04:34 -05:00
if stackDebug >= 4 {
2018-04-01 11:01:36 -07:00
for j := uintptr ( 0 ) ; j < 8 ; j ++ {
print ( " " , add ( scanp , ( i + j ) * sys . PtrSize ) , ":" , ptrnames [ bv . ptrbit ( i + j ) ] , ":" , hex ( * ( * uintptr ) ( add ( scanp , ( i + j ) * sys . PtrSize ) ) ) , " # " , i , " " , * addb ( bv . bytedata , i / 8 ) , "\n" )
}
2017-08-17 15:51:35 +01:00
}
2018-04-01 11:01:36 -07:00
b := * ( addb ( bv . bytedata , i / 8 ) )
for b != 0 {
j := uintptr ( sys . Ctz8 ( b ) )
b &= b - 1
pp := ( * uintptr ) ( add ( scanp , ( i + j ) * sys . PtrSize ) )
retry :
p := * pp
if f . valid ( ) && 0 < p && p < minLegalPointer && debug . invalidptr != 0 {
// Looks like a junk value in a pointer slot.
// Live analysis wrong?
getg ( ) . m . traceback = 2
print ( "runtime: bad pointer in frame " , funcname ( f ) , " at " , pp , ": " , hex ( p ) , "\n" )
throw ( "invalid pointer found on stack" )
2014-11-11 17:04:34 -05:00
}
2018-04-01 11:01:36 -07:00
if minp <= p && p < maxp {
if stackDebug >= 3 {
print ( "adjust ptr " , hex ( p ) , " " , funcname ( f ) , "\n" )
}
if useCAS {
ppu := ( * unsafe . Pointer ) ( unsafe . Pointer ( pp ) )
if ! atomic . Casp1 ( ppu , unsafe . Pointer ( p ) , unsafe . Pointer ( p + delta ) ) {
goto retry
}
} else {
* pp = p + delta
2016-02-15 17:38:06 -05:00
}
2014-11-11 17:04:34 -05:00
}
}
}
}
// Note: the argument/return area is adjusted by the callee.
func adjustframe ( frame * stkframe , arg unsafe . Pointer ) bool {
adjinfo := ( * adjustinfo ) ( arg )
2018-04-26 21:20:41 -04:00
if frame . continpc == 0 {
2014-11-11 17:04:34 -05:00
// Frame is dead.
return true
}
f := frame . fn
if stackDebug >= 2 {
print ( " adjusting " , funcname ( f ) , " frame=[" , hex ( frame . sp ) , "," , hex ( frame . fp ) , "] pc=" , hex ( frame . pc ) , " continpc=" , hex ( frame . continpc ) , "\n" )
}
2018-03-14 15:21:37 -07:00
if f . funcID == funcID_systemstack_switch {
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
// A special routine at the bottom of stack of a goroutine that does an systemstack call.
2014-11-11 17:04:34 -05:00
// We will allow it to be copied even though we don't
// have full GC info for it (because it is written in asm).
return true
}
2018-04-26 21:20:41 -04:00
2018-09-01 20:16:39 -07:00
locals , args , objs := getStackMap ( frame , & adjinfo . cache , true )
2014-11-11 17:04:34 -05:00
// Adjust local variables if stack frame has been allocated.
2018-04-26 21:20:41 -04:00
if locals . n > 0 {
size := uintptr ( locals . n ) * sys . PtrSize
adjustpointers ( unsafe . Pointer ( frame . varp - size ) , & locals , adjinfo , f )
2014-11-11 17:04:34 -05:00
}
2015-01-14 11:09:50 -05:00
// Adjust saved base pointer if there is one.
2016-04-07 15:42:35 +09:00
if sys . ArchFamily == sys . AMD64 && frame . argp - frame . varp == 2 * sys . RegSize {
2015-01-14 11:09:50 -05:00
if ! framepointer_enabled {
2015-02-03 08:35:38 -05:00
print ( "runtime: found space for saved base pointer, but no framepointer experiment\n" )
2015-02-03 09:09:56 -05:00
print ( "argp=" , hex ( frame . argp ) , " varp=" , hex ( frame . varp ) , "\n" )
2015-01-14 11:09:50 -05:00
throw ( "bad frame layout" )
}
if stackDebug >= 3 {
print ( " saved bp\n" )
}
2016-12-02 15:17:52 -08:00
if debugCheckBP {
// Frame pointers should always point to the next higher frame on
// the Go stack (or be nil, for the top frame on the stack).
bp := * ( * uintptr ) ( unsafe . Pointer ( frame . varp ) )
if bp != 0 && ( bp < adjinfo . old . lo || bp >= adjinfo . old . hi ) {
println ( "runtime: found invalid frame pointer" )
print ( "bp=" , hex ( bp ) , " min=" , hex ( adjinfo . old . lo ) , " max=" , hex ( adjinfo . old . hi ) , "\n" )
throw ( "bad frame pointer" )
}
}
2015-01-14 11:09:50 -05:00
adjustpointer ( adjinfo , unsafe . Pointer ( frame . varp ) )
}
2014-11-11 17:04:34 -05:00
// Adjust arguments.
2018-04-26 21:20:41 -04:00
if args . n > 0 {
2014-11-11 17:04:34 -05:00
if stackDebug >= 3 {
print ( " args\n" )
}
2018-04-26 21:20:41 -04:00
adjustpointers ( unsafe . Pointer ( frame . argp ) , & args , adjinfo , funcInfo { } )
2014-11-11 17:04:34 -05:00
}
2018-09-01 20:16:39 -07:00
// Adjust pointers in all stack objects (whether they are live or not).
// See comments in mgcmark.go:scanframeworker.
if frame . varp != 0 {
for _ , obj := range objs {
off := obj . off
base := frame . varp // locals base pointer
if off >= 0 {
base = frame . argp // arguments and return values base pointer
}
p := base + uintptr ( off )
if p < frame . sp {
// Object hasn't been allocated in the frame yet.
// (Happens when the stack bounds check fails and
// we call into morestack.)
continue
}
t := obj . typ
gcdata := t . gcdata
var s * mspan
if t . kind & kindGCProg != 0 {
// See comments in mgcmark.go:scanstack
s = materializeGCProg ( t . ptrdata , gcdata )
gcdata = ( * byte ) ( unsafe . Pointer ( s . startAddr ) )
}
for i := uintptr ( 0 ) ; i < t . ptrdata ; i += sys . PtrSize {
if * addb ( gcdata , i / ( 8 * sys . PtrSize ) ) >> ( i / sys . PtrSize & 7 ) & 1 != 0 {
adjustpointer ( adjinfo , unsafe . Pointer ( p + i ) )
}
}
if s != nil {
dematerializeGCProg ( s )
}
}
}
2014-11-11 17:04:34 -05:00
return true
}
func adjustctxt ( gp * g , adjinfo * adjustinfo ) {
2015-10-15 14:33:50 -07:00
adjustpointer ( adjinfo , unsafe . Pointer ( & gp . sched . ctxt ) )
2016-12-02 15:17:52 -08:00
if ! framepointer_enabled {
return
}
if debugCheckBP {
bp := gp . sched . bp
if bp != 0 && ( bp < adjinfo . old . lo || bp >= adjinfo . old . hi ) {
println ( "runtime: found invalid top frame pointer" )
print ( "bp=" , hex ( bp ) , " min=" , hex ( adjinfo . old . lo ) , " max=" , hex ( adjinfo . old . hi ) , "\n" )
throw ( "bad top frame pointer" )
}
}
adjustpointer ( adjinfo , unsafe . Pointer ( & gp . sched . bp ) )
2014-11-11 17:04:34 -05:00
}
func adjustdefers ( gp * g , adjinfo * adjustinfo ) {
2019-06-05 18:42:31 +00:00
// Adjust defer argument blocks the same way we adjust active stack frames.
tracebackdefers ( gp , adjustframe , noescape ( unsafe . Pointer ( adjinfo ) ) )
2014-11-11 17:04:34 -05:00
// Adjust pointers in the Defer structs.
2019-06-05 18:42:31 +00:00
// Defer structs themselves are never on the stack.
2014-11-11 17:04:34 -05:00
for d := gp . _defer ; d != nil ; d = d . link {
2015-10-15 14:33:50 -07:00
adjustpointer ( adjinfo , unsafe . Pointer ( & d . fn ) )
adjustpointer ( adjinfo , unsafe . Pointer ( & d . sp ) )
adjustpointer ( adjinfo , unsafe . Pointer ( & d . _panic ) )
2014-11-11 17:04:34 -05:00
}
}
func adjustpanics ( gp * g , adjinfo * adjustinfo ) {
// Panics are on stack and already adjusted.
// Update pointer to head of list in G.
2015-10-15 14:33:50 -07:00
adjustpointer ( adjinfo , unsafe . Pointer ( & gp . _panic ) )
2014-11-11 17:04:34 -05:00
}
func adjustsudogs ( gp * g , adjinfo * adjustinfo ) {
// the data elements pointed to by a SudoG structure
// might be in the stack.
for s := gp . waiting ; s != nil ; s = s . waitlink {
2015-10-15 14:33:50 -07:00
adjustpointer ( adjinfo , unsafe . Pointer ( & s . elem ) )
2014-11-11 17:04:34 -05:00
}
}
func fillstack ( stk stack , b byte ) {
for p := stk . lo ; p < stk . hi ; p ++ {
* ( * byte ) ( unsafe . Pointer ( p ) ) = b
}
}
2016-02-15 17:38:06 -05:00
func findsghi ( gp * g , stk stack ) uintptr {
var sghi uintptr
for sg := gp . waiting ; sg != nil ; sg = sg . waitlink {
p := uintptr ( sg . elem ) + uintptr ( sg . c . elemsize )
if stk . lo <= p && p < stk . hi && p > sghi {
sghi = p
}
}
return sghi
}
// syncadjustsudogs adjusts gp's sudogs and copies the part of gp's
// stack they refer to while synchronizing with concurrent channel
// operations. It returns the number of bytes of stack copied.
func syncadjustsudogs ( gp * g , used uintptr , adjinfo * adjustinfo ) uintptr {
if gp . waiting == nil {
return 0
}
// Lock channels to prevent concurrent send/receive.
// It's important that we *only* do this for async
// copystack; otherwise, gp may be in the middle of
// putting itself on wait queues and this would
// self-deadlock.
2016-07-07 17:43:08 -07:00
var lastc * hchan
2016-02-15 17:38:06 -05:00
for sg := gp . waiting ; sg != nil ; sg = sg . waitlink {
2016-07-07 17:43:08 -07:00
if sg . c != lastc {
lock ( & sg . c . lock )
}
lastc = sg . c
2016-02-15 17:38:06 -05:00
}
// Adjust sudogs.
adjustsudogs ( gp , adjinfo )
// Copy the part of the stack the sudogs point in to
// while holding the lock to prevent races on
// send/receive slots.
var sgsize uintptr
if adjinfo . sghi != 0 {
oldBot := adjinfo . old . hi - used
newBot := oldBot + adjinfo . delta
sgsize = adjinfo . sghi - oldBot
memmove ( unsafe . Pointer ( newBot ) , unsafe . Pointer ( oldBot ) , sgsize )
}
// Unlock channels.
2016-07-07 17:43:08 -07:00
lastc = nil
2016-02-15 17:38:06 -05:00
for sg := gp . waiting ; sg != nil ; sg = sg . waitlink {
2016-07-07 17:43:08 -07:00
if sg . c != lastc {
unlock ( & sg . c . lock )
}
lastc = sg . c
2016-02-15 17:38:06 -05:00
}
return sgsize
}
2014-11-11 17:04:34 -05:00
// Copies gp's stack to a new stack of a different size.
2014-11-15 08:00:38 -05:00
// Caller must have changed gp status to Gcopystack.
2016-02-15 17:38:06 -05:00
//
// If sync is true, this is a self-triggered stack growth and, in
// particular, no other G may be writing to gp's stack (e.g., via a
// channel operation). If sync is false, copystack protects against
// concurrent channel operations.
func copystack ( gp * g , newsize uintptr , sync bool ) {
2014-11-11 17:04:34 -05:00
if gp . syscallsp != 0 {
2014-12-27 20:58:00 -08:00
throw ( "stack growth not allowed in system call" )
2014-11-11 17:04:34 -05:00
}
old := gp . stack
if old . lo == 0 {
2014-12-27 20:58:00 -08:00
throw ( "nil stackbase" )
2014-11-11 17:04:34 -05:00
}
used := old . hi - gp . sched . sp
// allocate new stack
2017-02-09 14:03:49 -05:00
new := stackalloc ( uint32 ( newsize ) )
2014-11-11 17:04:34 -05:00
if stackPoisonCopy != 0 {
fillstack ( new , 0xfd )
}
if stackDebug >= 1 {
2017-02-09 14:11:13 -05:00
print ( "copystack gp=" , gp , " [" , hex ( old . lo ) , " " , hex ( old . hi - used ) , " " , hex ( old . hi ) , "]" , " -> [" , hex ( new . lo ) , " " , hex ( new . hi - used ) , " " , hex ( new . hi ) , "]/" , newsize , "\n" )
2014-11-11 17:04:34 -05:00
}
2016-02-16 12:23:33 -05:00
// Compute adjustment.
2014-11-11 17:04:34 -05:00
var adjinfo adjustinfo
adjinfo . old = old
adjinfo . delta = new . hi - old . hi
2016-02-15 17:38:06 -05:00
// Adjust sudogs, synchronizing with channel ops if necessary.
ncopy := used
if sync {
adjustsudogs ( gp , & adjinfo )
} else {
// sudogs can point in to the stack. During concurrent
// shrinking, these areas may be written to. Find the
// highest such pointer so we can handle everything
// there and below carefully. (This shouldn't be far
// from the bottom of the stack, so there's little
// cost in handling everything below it carefully.)
adjinfo . sghi = findsghi ( gp , old )
// Synchronize with channel ops and copy the part of
// the stack they may interact with.
ncopy -= syncadjustsudogs ( gp , used , & adjinfo )
}
// Copy the stack (or the rest of it) to the new location
memmove ( unsafe . Pointer ( new . hi - ncopy ) , unsafe . Pointer ( old . hi - ncopy ) , ncopy )
2016-02-16 12:23:33 -05:00
2016-02-15 17:38:06 -05:00
// Adjust remaining structures that have pointers into stacks.
// We have to do most of these before we traceback the new
// stack because gentraceback uses them.
2014-11-11 17:04:34 -05:00
adjustctxt ( gp , & adjinfo )
adjustdefers ( gp , & adjinfo )
adjustpanics ( gp , & adjinfo )
2016-02-15 17:38:06 -05:00
if adjinfo . sghi != 0 {
adjinfo . sghi += adjinfo . delta
}
2014-11-11 17:04:34 -05:00
// Swap out old stack for new one
gp . stack = new
2015-01-05 16:29:21 +00:00
gp . stackguard0 = new . lo + _StackGuard // NOTE: might clobber a preempt request
2014-11-11 17:04:34 -05:00
gp . sched . sp = new . hi - used
2015-08-26 11:39:10 -04:00
gp . stktopsp += adjinfo . delta
2014-11-11 17:04:34 -05:00
2016-02-16 12:23:33 -05:00
// Adjust pointers in the new stack.
gentraceback ( ^ uintptr ( 0 ) , ^ uintptr ( 0 ) , 0 , gp , 0 , nil , 0x7fffffff , adjustframe , noescape ( unsafe . Pointer ( & adjinfo ) ) , 0 )
2014-11-11 17:04:34 -05:00
// free old stack
if stackPoisonCopy != 0 {
fillstack ( old , 0xfc )
}
2017-02-09 14:11:13 -05:00
stackfree ( old )
2014-11-11 17:04:34 -05:00
}
// round x up to a power of 2.
func round2 ( x int32 ) int32 {
s := uint ( 0 )
for 1 << s < x {
s ++
}
return 1 << s
}
// Called from runtime·morestack when more stack is needed.
// Allocate larger stack and relocate to new stack.
// Stack growth is multiplicative, for constant amortized cost.
//
// g->atomicstatus will be Grunning or Gscanrunning upon entry.
// If the GC is trying to stop this g then it will set preemptscan to true.
2016-10-19 18:27:39 -04:00
//
runtime: remove write barriers from newstack, gogo
Currently, newstack and gogo have write barriers for maintaining the
context register saved in g.sched.ctxt. This is troublesome, because
newstack can be called from go:nowritebarrierrec places that can't
allow write barriers. It happens to be benign because g.sched.ctxt
will always be nil on entry to newstack *and* it so happens the
incoming ctxt will also always be nil in these contexts (I
think/hope), but this is playing with fire. It's also desirable to
mark newstack go:nowritebarrierrec to prevent any other, non-benign
write barriers from creeping in, but we can't do that right now
because of this one write barrier.
Fix all of this by observing that g.sched.ctxt is really just a saved
live pointer register. Hence, we can shade it when we scan g's stack
and otherwise move it back and forth between the actual context
register and g.sched.ctxt without write barriers. This means we can
save it in morestack along with all of the other g.sched, eliminate
the save from newstack along with its troublesome write barrier, and
eliminate the shenanigans in gogo to invoke the write barrier when
restoring it.
Once we've done all of this, we can mark newstack
go:nowritebarrierrec.
Fixes #22385.
For #22460.
Change-Id: I43c24958e3f6785b53c1350e1e83c2844e0d1522
Reviewed-on: https://go-review.googlesource.com/72553
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2017-10-22 21:37:05 -04:00
// This must be nowritebarrierrec because it can be called as part of
// stack growth from other nowritebarrierrec functions, but the
// compiler doesn't check this.
//
//go:nowritebarrierrec
func newstack ( ) {
2014-11-11 17:04:34 -05:00
thisg := getg ( )
// TODO: double check all gp. shouldn't be getg().
2014-12-22 10:53:51 -05:00
if thisg . m . morebuf . g . ptr ( ) . stackguard0 == stackFork {
2014-12-27 20:58:00 -08:00
throw ( "stack growth after fork" )
2014-11-11 17:04:34 -05:00
}
2014-12-22 10:53:51 -05:00
if thisg . m . morebuf . g . ptr ( ) != thisg . m . curg {
2015-08-24 21:24:23 -04:00
print ( "runtime: newstack called from g=" , hex ( thisg . m . morebuf . g ) , "\n" + "\tm=" , thisg . m , " m->curg=" , thisg . m . curg , " m->g0=" , thisg . m . g0 , " m->gsignal=" , thisg . m . gsignal , "\n" )
2014-11-11 17:04:34 -05:00
morebuf := thisg . m . morebuf
2014-12-22 10:53:51 -05:00
traceback ( morebuf . pc , morebuf . sp , morebuf . lr , morebuf . g . ptr ( ) )
2014-12-27 20:58:00 -08:00
throw ( "runtime: wrong goroutine in newstack" )
2014-11-11 17:04:34 -05:00
}
2016-10-19 18:27:39 -04:00
gp := thisg . m . curg
2014-11-11 17:04:34 -05:00
if thisg . m . curg . throwsplit {
// Update syscallsp, syscallpc in case traceback uses them.
morebuf := thisg . m . morebuf
gp . syscallsp = morebuf . sp
gp . syscallpc = morebuf . pc
2017-12-14 15:32:12 -05:00
pcname , pcoff := "(unknown)" , uintptr ( 0 )
f := findfunc ( gp . sched . pc )
if f . valid ( ) {
pcname = funcname ( f )
pcoff = gp . sched . pc - f . entry
}
print ( "runtime: newstack at " , pcname , "+" , hex ( pcoff ) ,
" sp=" , hex ( gp . sched . sp ) , " stack=[" , hex ( gp . stack . lo ) , ", " , hex ( gp . stack . hi ) , "]\n" ,
2014-11-11 17:04:34 -05:00
"\tmorebuf={pc:" , hex ( morebuf . pc ) , " sp:" , hex ( morebuf . sp ) , " lr:" , hex ( morebuf . lr ) , "}\n" ,
"\tsched={pc:" , hex ( gp . sched . pc ) , " sp:" , hex ( gp . sched . sp ) , " lr:" , hex ( gp . sched . lr ) , " ctxt:" , gp . sched . ctxt , "}\n" )
2014-12-22 10:53:51 -05:00
2017-11-22 15:29:03 -05:00
thisg . m . traceback = 2 // Include runtime frames
2014-12-22 10:53:51 -05:00
traceback ( morebuf . pc , morebuf . sp , morebuf . lr , gp )
2014-12-27 20:58:00 -08:00
throw ( "runtime: stack split at bad time" )
2014-11-11 17:04:34 -05:00
}
morebuf := thisg . m . morebuf
thisg . m . morebuf . pc = 0
thisg . m . morebuf . lr = 0
thisg . m . morebuf . sp = 0
2014-12-22 10:53:51 -05:00
thisg . m . morebuf . g = 0
2015-01-13 15:55:16 -05:00
2015-01-14 16:36:41 -05:00
// NOTE: stackguard0 may change underfoot, if another thread
// is about to try to preempt gp. Read it just once and use that same
// value now and below.
2015-11-02 14:09:24 -05:00
preempt := atomic . Loaduintptr ( & gp . stackguard0 ) == stackPreempt
2015-01-14 16:36:41 -05:00
2015-01-13 15:55:16 -05:00
// Be conservative about where we preempt.
// We are interested in preempting user Go code, not runtime code.
2015-01-30 15:30:41 -05:00
// If we're holding locks, mallocing, or preemption is disabled, don't
// preempt.
2015-01-13 15:55:16 -05:00
// This check is very early in newstack so that even the status change
// from Grunning to Gwaiting and back doesn't happen in this case.
// That status change by itself can be viewed as a small preemption,
// because the GC might change Gwaiting to Gscanwaiting, and then
// this goroutine has to wait for the GC to finish before continuing.
// If the GC is in some way dependent on this goroutine (for example,
// it needs a lock held by the goroutine), that small preemption turns
// into a real deadlock.
2015-01-14 16:36:41 -05:00
if preempt {
2015-04-17 00:21:30 -04:00
if thisg . m . locks != 0 || thisg . m . mallocing != 0 || thisg . m . preemptoff != "" || thisg . m . p . ptr ( ) . status != _Prunning {
2015-01-13 15:55:16 -05:00
// Let the goroutine keep running for now.
// gp->preempt is set, so it will be preempted next time.
gp . stackguard0 = gp . stack . lo + _StackGuard
gogo ( & gp . sched ) // never return
}
}
2014-11-11 17:04:34 -05:00
if gp . stack . lo == 0 {
2014-12-27 20:58:00 -08:00
throw ( "missing stack in newstack" )
2014-11-11 17:04:34 -05:00
}
sp := gp . sched . sp
2018-06-12 23:22:03 +02:00
if sys . ArchFamily == sys . AMD64 || sys . ArchFamily == sys . I386 || sys . ArchFamily == sys . WASM {
2014-11-11 17:04:34 -05:00
// The call to morestack cost a word.
2015-11-11 12:39:30 -05:00
sp -= sys . PtrSize
2014-11-11 17:04:34 -05:00
}
if stackDebug >= 1 || sp < gp . stack . lo {
print ( "runtime: newstack sp=" , hex ( sp ) , " stack=[" , hex ( gp . stack . lo ) , ", " , hex ( gp . stack . hi ) , "]\n" ,
"\tmorebuf={pc:" , hex ( morebuf . pc ) , " sp:" , hex ( morebuf . sp ) , " lr:" , hex ( morebuf . lr ) , "}\n" ,
"\tsched={pc:" , hex ( gp . sched . pc ) , " sp:" , hex ( gp . sched . sp ) , " lr:" , hex ( gp . sched . lr ) , " ctxt:" , gp . sched . ctxt , "}\n" )
}
if sp < gp . stack . lo {
2018-03-13 21:21:25 -07:00
print ( "runtime: gp=" , gp , ", goid=" , gp . goid , ", gp->status=" , hex ( readgstatus ( gp ) ) , "\n " )
2014-11-11 17:04:34 -05:00
print ( "runtime: split stack overflow: " , hex ( sp ) , " < " , hex ( gp . stack . lo ) , "\n" )
2014-12-27 20:58:00 -08:00
throw ( "runtime: split stack overflow" )
2014-11-11 17:04:34 -05:00
}
2015-01-14 16:36:41 -05:00
if preempt {
2014-11-11 17:04:34 -05:00
if gp == thisg . m . g0 {
2014-12-27 20:58:00 -08:00
throw ( "runtime: preempt g0" )
2014-11-11 17:04:34 -05:00
}
2015-04-17 00:21:30 -04:00
if thisg . m . p == 0 && thisg . m . locks == 0 {
2014-12-27 20:58:00 -08:00
throw ( "runtime: g is running but p is not" )
2014-11-11 17:04:34 -05:00
}
2016-02-25 15:37:40 -05:00
// Synchronize with scang.
casgstatus ( gp , _Grunning , _Gwaiting )
2014-11-11 17:04:34 -05:00
if gp . preemptscan {
2014-11-21 16:46:27 -05:00
for ! castogscanstatus ( gp , _Gwaiting , _Gscanwaiting ) {
2015-05-28 12:37:12 -04:00
// Likely to be racing with the GC as
// it sees a _Gwaiting and does the
// stack scan. If so, gcworkdone will
// be set and gcphasework will simply
// return.
2014-11-21 16:46:27 -05:00
}
2015-06-16 19:20:18 -04:00
if ! gp . gcscandone {
runtime: pass gcWork to scanstack
Currently scanstack obtains its own gcWork from the P for the duration
of the stack scan and then, if called during mark termination,
disposes the gcWork.
However, this means that the number of workbufs allocated will be at
least the number of stacks scanned during mark termination, which may
be very high (especially during a STW GC). This happens because, in
steady state, each scanstack will obtain a fresh workbuf (either from
the empty list or by allocating it), fill it with the scan results,
and then dispose it to the full list. Nothing is consuming from the
full list during this (and hence nothing is recycling them to the
empty list), so the length of the full list by the time mark
termination starts draining it is at least the number of stacks
scanned.
Fix this by pushing the gcWork acquisition up the stack to either the
gcDrain that calls markroot that calls scanstack (which batches across
many stack scans and is the path taken during STW GC) or to newstack
(which is still a single scanstack call, but this is roughly bounded
by the number of Ps).
This fix reduces the workbuf allocation for the test program from
issue #15319 from 213 MB (roughly 2KB * 1e5 goroutines) to 10 MB.
Fixes #15319.
Note that there's potentially a similar issue in write barriers during
mark 2. Fixing that will be more difficult since there's no broader
non-preemptible context, but it should also be less of a problem since
the full list is being drained during mark 2.
Some overall improvements in the go1 benchmarks, plus the usual noise.
No significant change in the garbage benchmark (time/op or GC memory).
name old time/op new time/op delta
BinaryTree17-12 2.54s ± 1% 2.51s ± 1% -1.09% (p=0.000 n=20+19)
Fannkuch11-12 2.12s ± 0% 2.17s ± 0% +2.18% (p=0.000 n=19+18)
FmtFprintfEmpty-12 45.1ns ± 1% 45.2ns ± 0% ~ (p=0.078 n=19+18)
FmtFprintfString-12 127ns ± 0% 128ns ± 0% +1.08% (p=0.000 n=19+16)
FmtFprintfInt-12 125ns ± 0% 122ns ± 1% -2.71% (p=0.000 n=14+18)
FmtFprintfIntInt-12 196ns ± 0% 190ns ± 1% -2.91% (p=0.000 n=12+20)
FmtFprintfPrefixedInt-12 196ns ± 0% 194ns ± 1% -0.94% (p=0.000 n=13+18)
FmtFprintfFloat-12 253ns ± 1% 251ns ± 1% -0.86% (p=0.000 n=19+20)
FmtManyArgs-12 807ns ± 1% 784ns ± 1% -2.85% (p=0.000 n=20+20)
GobDecode-12 7.13ms ± 1% 7.12ms ± 1% ~ (p=0.351 n=19+20)
GobEncode-12 5.89ms ± 0% 5.95ms ± 0% +0.94% (p=0.000 n=19+19)
Gzip-12 219ms ± 1% 221ms ± 1% +1.35% (p=0.000 n=18+20)
Gunzip-12 37.5ms ± 1% 37.4ms ± 0% ~ (p=0.057 n=20+19)
HTTPClientServer-12 81.4µs ± 4% 81.9µs ± 3% ~ (p=0.118 n=17+18)
JSONEncode-12 15.7ms ± 1% 15.8ms ± 1% +0.73% (p=0.000 n=17+18)
JSONDecode-12 57.9ms ± 1% 57.2ms ± 1% -1.34% (p=0.000 n=19+19)
Mandelbrot200-12 4.12ms ± 1% 4.10ms ± 0% -0.33% (p=0.000 n=19+17)
GoParse-12 3.22ms ± 2% 3.25ms ± 1% +0.72% (p=0.000 n=18+20)
RegexpMatchEasy0_32-12 70.6ns ± 1% 71.1ns ± 2% +0.63% (p=0.005 n=19+20)
RegexpMatchEasy0_1K-12 240ns ± 0% 239ns ± 1% -0.59% (p=0.000 n=19+20)
RegexpMatchEasy1_32-12 71.3ns ± 1% 71.3ns ± 1% ~ (p=0.844 n=17+17)
RegexpMatchEasy1_1K-12 384ns ± 2% 371ns ± 1% -3.45% (p=0.000 n=19+20)
RegexpMatchMedium_32-12 109ns ± 1% 108ns ± 2% -0.48% (p=0.029 n=19+19)
RegexpMatchMedium_1K-12 34.3µs ± 1% 34.5µs ± 2% ~ (p=0.160 n=18+20)
RegexpMatchHard_32-12 1.79µs ± 9% 1.72µs ± 2% -3.83% (p=0.000 n=19+19)
RegexpMatchHard_1K-12 53.3µs ± 4% 51.8µs ± 1% -2.82% (p=0.000 n=19+20)
Revcomp-12 386ms ± 0% 388ms ± 0% +0.72% (p=0.000 n=17+20)
Template-12 62.9ms ± 1% 62.5ms ± 1% -0.57% (p=0.010 n=18+19)
TimeParse-12 325ns ± 0% 331ns ± 0% +1.84% (p=0.000 n=18+19)
TimeFormat-12 338ns ± 0% 343ns ± 0% +1.34% (p=0.000 n=18+20)
[Geo mean] 52.7µs 52.5µs -0.42%
Change-Id: Ib2d34736c4ae2ec329605b0fbc44636038d8d018
Reviewed-on: https://go-review.googlesource.com/23391
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rick Hudson <rlh@golang.org>
2016-05-23 22:14:53 -04:00
// gcw is safe because we're on the
// system stack.
gcw := & gp . m . p . ptr ( ) . gcw
scanstack ( gp , gcw )
2015-06-16 19:20:18 -04:00
gp . gcscandone = true
}
gp . preemptscan = false
gp . preempt = false
2014-11-21 16:46:27 -05:00
casfrom_Gscanstatus ( gp , _Gscanwaiting , _Gwaiting )
runtime: don't clear gcscanvalid in casfrom_Gscanstatus
Currently we clear gcscanvalid in both casgstatus and
casfrom_Gscanstatus if the new status is _Grunning. This is very
important to do in casgstatus. However, this is potentially wrong in
casfrom_Gscanstatus because in this case the caller doesn't own gp and
hence the write is racy. Unlike the other _Gscan statuses, during
_Gscanrunning, the G is still running. This does not indicate that
it's transitioning into a running state. The scan simply hasn't
happened yet, so it's neither valid nor invalid.
Conveniently, this also means clearing gcscanvalid is unnecessary in
this case because the G was already in _Grunning, so we can simply
remove this code. What will happen instead is that the G will be
preempted to scan itself, that scan will set gcscanvalid to true, and
then the G will return to _Grunning via casgstatus, clearing
gcscanvalid.
This fix will become necessary shortly when we start keeping track of
the set of G's with dirty stacks, since it will no longer be
idempotent to simply set gcscanvalid to false.
Change-Id: I688c82e6fbf00d5dbbbff49efa66acb99ee86785
Reviewed-on: https://go-review.googlesource.com/20669
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-03-11 14:08:10 -05:00
// This clears gcscanvalid.
2014-11-11 17:04:34 -05:00
casgstatus ( gp , _Gwaiting , _Grunning )
2015-01-05 16:29:21 +00:00
gp . stackguard0 = gp . stack . lo + _StackGuard
2015-06-16 19:20:18 -04:00
gogo ( & gp . sched ) // never return
2014-11-11 17:04:34 -05:00
}
// Act like goroutine called runtime.Gosched.
casgstatus ( gp , _Gwaiting , _Grunning )
2014-12-12 18:41:57 +01:00
gopreempt_m ( gp ) // never return
2014-11-11 17:04:34 -05:00
}
// Allocate a bigger segment and move the stack.
2017-02-09 14:11:13 -05:00
oldsize := gp . stack . hi - gp . stack . lo
2014-11-11 17:04:34 -05:00
newsize := oldsize * 2
2017-02-09 14:11:13 -05:00
if newsize > maxstacksize {
2014-11-11 17:04:34 -05:00
print ( "runtime: goroutine stack exceeds " , maxstacksize , "-byte limit\n" )
2014-12-27 20:58:00 -08:00
throw ( "stack overflow" )
2014-11-11 17:04:34 -05:00
}
2016-02-25 15:37:40 -05:00
// The goroutine must be executing in order to call newstack,
// so it must be Grunning (or Gscanrunning).
casgstatus ( gp , _Grunning , _Gcopystack )
2014-11-15 08:00:38 -05:00
// The concurrent GC will not scan the stack while we are doing the copy since
// the gp is in a Gcopystack status.
2017-02-09 14:11:13 -05:00
copystack ( gp , newsize , true )
2014-11-11 17:04:34 -05:00
if stackDebug >= 1 {
print ( "stack grow done\n" )
}
2014-11-15 08:00:38 -05:00
casgstatus ( gp , _Gcopystack , _Grunning )
2014-11-11 17:04:34 -05:00
gogo ( & gp . sched )
}
//go:nosplit
func nilfunc ( ) {
* ( * uint8 ) ( nil ) = 0
}
// adjust Gobuf as if it executed a call to fn
// and then did an immediate gosave.
func gostartcallfn ( gobuf * gobuf , fv * funcval ) {
var fn unsafe . Pointer
if fv != nil {
2015-10-15 14:33:50 -07:00
fn = unsafe . Pointer ( fv . fn )
2014-11-11 17:04:34 -05:00
} else {
fn = unsafe . Pointer ( funcPC ( nilfunc ) )
}
2015-10-15 14:33:50 -07:00
gostartcall ( gobuf , fn , unsafe . Pointer ( fv ) )
2014-11-11 17:04:34 -05:00
}
// Maybe shrink the stack being used by gp.
// Called at garbage collection time.
2016-02-15 17:38:06 -05:00
// gp must be stopped, but the world need not be.
2014-11-11 17:04:34 -05:00
func shrinkstack ( gp * g ) {
2016-02-15 18:30:48 -05:00
gstatus := readgstatus ( gp )
2014-11-11 17:04:34 -05:00
if gp . stack . lo == 0 {
2014-12-27 20:58:00 -08:00
throw ( "missing stack in shrinkstack" )
2014-11-11 17:04:34 -05:00
}
2016-02-15 18:30:48 -05:00
if gstatus & _Gscan == 0 {
throw ( "bad status in shrinkstack" )
}
2014-11-11 17:04:34 -05:00
2015-06-05 11:51:49 -04:00
if debug . gcshrinkstackoff > 0 {
return
}
2018-03-14 15:21:37 -07:00
f := findfunc ( gp . startpc )
if f . valid ( ) && f . funcID == funcID_gcBgMarkWorker {
runtime: scan mark worker stacks like normal
Currently, markroot delays scanning mark worker stacks until mark
termination by putting the mark worker G directly on the rescan list
when it encounters one during the mark phase. Without this, since mark
workers are non-preemptible, two mark workers that attempt to scan
each other's stacks can deadlock.
However, this is annoyingly asymmetric and causes some real problems.
First, markroot does not own the G at that point, so it's not
technically safe to add it to the rescan list. I haven't been able to
find a specific problem this could cause, but I suspect it's the root
cause of issue #17099. Second, this will interfere with the hybrid
barrier, since there is no stack rescanning during mark termination
with the hybrid barrier.
This commit switches to a different approach. We move the mark
worker's call to gcDrain to the system stack and set the mark worker's
status to _Gwaiting for the duration of the drain to indicate that
it's preemptible. This lets another mark worker scan its G stack while
the drain is running on the system stack. We don't return to the G
stack until we can switch back to _Grunning, which ensures we don't
race with a stack scan. This lets us eliminate the special case for
mark worker stack scans and scan them just like any other goroutine.
The only subtlety to this approach is that we have to disable stack
shrinking for mark workers; they could be referring to captured
variables from the G stack, so it's not safe to move their stacks.
Updates #17099 and #17503.
Change-Id: Ia5213949ec470af63e24dfce01df357c12adbbea
Reviewed-on: https://go-review.googlesource.com/31820
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rick Hudson <rlh@golang.org>
2016-10-24 14:20:07 -04:00
// We're not allowed to shrink the gcBgMarkWorker
// stack (see gcBgMarkWorker for explanation).
return
}
2015-06-05 11:51:49 -04:00
2017-02-09 14:11:13 -05:00
oldsize := gp . stack . hi - gp . stack . lo
2014-11-11 17:04:34 -05:00
newsize := oldsize / 2
runtime: account for stack guard when shrinking the stack
Currently, when shrinkstack computes whether the halved stack
allocation will have enough room for the stack, it accounts for the
stack space that's actively in use but fails to leave extra room for
the stack guard space. As a result, *if* the minimum stack size is
small enough or the guard large enough, it may shrink the stack and
leave less than enough room to run nosplit functions. If the next
function called after the stack shrink is a nosplit function, it may
overflow the stack without noticing and overwrite non-stack memory.
We don't think this is happening under normal conditions right now.
The minimum stack allocation is 2K and the guard is 640 bytes. The
"worst case" stack shrink is from 4K (4048 bytes after stack barrier
array reservation) to 2K (2016 bytes after stack barrier array
reservation), which means the largest "used" size that will qualify
for shrinking is 4048/4 - 8 = 1004 bytes. After copying, that leaves
2016 - 1004 = 1012 bytes of available stack, which is significantly
more than the guard space.
If we were to reduce the minimum stack size to 1K or raise the guard
space above 1012 bytes, the logic in shrinkstack would no longer leave
enough space.
It's also possible to trigger this problem by setting
firstStackBarrierOffset to 0, which puts stack barriers in a debug
mode that steals away *half* of the stack for the stack barrier array
reservation. Then, the largest "used" size that qualifies for
shrinking is (4096/2)/4 - 8 = 504 bytes. After copying, that leaves
(2096/2) - 504 = 8 bytes of available stack; much less than the
required guard space. This causes failures like those in issue #11027
because func gc() shrinks its own stack and then immediately calls
casgstatus (a nosplit function), which overflows the stack and
overwrites a free list pointer in the neighboring span. However, since
this seems to require the special debug mode, we don't think it's
responsible for issue #11027.
To forestall all of these subtle issues, this commit modifies
shrinkstack to correctly account for the guard space when considering
whether to halve the stack allocation.
Change-Id: I7312584addc63b5bfe55cc384a1012f6181f1b9d
Reviewed-on: https://go-review.googlesource.com/10714
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-04 17:28:02 -04:00
// Don't shrink the allocation below the minimum-sized stack
// allocation.
2014-11-11 17:04:34 -05:00
if newsize < _FixedStack {
runtime: account for stack guard when shrinking the stack
Currently, when shrinkstack computes whether the halved stack
allocation will have enough room for the stack, it accounts for the
stack space that's actively in use but fails to leave extra room for
the stack guard space. As a result, *if* the minimum stack size is
small enough or the guard large enough, it may shrink the stack and
leave less than enough room to run nosplit functions. If the next
function called after the stack shrink is a nosplit function, it may
overflow the stack without noticing and overwrite non-stack memory.
We don't think this is happening under normal conditions right now.
The minimum stack allocation is 2K and the guard is 640 bytes. The
"worst case" stack shrink is from 4K (4048 bytes after stack barrier
array reservation) to 2K (2016 bytes after stack barrier array
reservation), which means the largest "used" size that will qualify
for shrinking is 4048/4 - 8 = 1004 bytes. After copying, that leaves
2016 - 1004 = 1012 bytes of available stack, which is significantly
more than the guard space.
If we were to reduce the minimum stack size to 1K or raise the guard
space above 1012 bytes, the logic in shrinkstack would no longer leave
enough space.
It's also possible to trigger this problem by setting
firstStackBarrierOffset to 0, which puts stack barriers in a debug
mode that steals away *half* of the stack for the stack barrier array
reservation. Then, the largest "used" size that qualifies for
shrinking is (4096/2)/4 - 8 = 504 bytes. After copying, that leaves
(2096/2) - 504 = 8 bytes of available stack; much less than the
required guard space. This causes failures like those in issue #11027
because func gc() shrinks its own stack and then immediately calls
casgstatus (a nosplit function), which overflows the stack and
overwrites a free list pointer in the neighboring span. However, since
this seems to require the special debug mode, we don't think it's
responsible for issue #11027.
To forestall all of these subtle issues, this commit modifies
shrinkstack to correctly account for the guard space when considering
whether to halve the stack allocation.
Change-Id: I7312584addc63b5bfe55cc384a1012f6181f1b9d
Reviewed-on: https://go-review.googlesource.com/10714
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-04 17:28:02 -04:00
return
2014-11-11 17:04:34 -05:00
}
runtime: account for stack guard when shrinking the stack
Currently, when shrinkstack computes whether the halved stack
allocation will have enough room for the stack, it accounts for the
stack space that's actively in use but fails to leave extra room for
the stack guard space. As a result, *if* the minimum stack size is
small enough or the guard large enough, it may shrink the stack and
leave less than enough room to run nosplit functions. If the next
function called after the stack shrink is a nosplit function, it may
overflow the stack without noticing and overwrite non-stack memory.
We don't think this is happening under normal conditions right now.
The minimum stack allocation is 2K and the guard is 640 bytes. The
"worst case" stack shrink is from 4K (4048 bytes after stack barrier
array reservation) to 2K (2016 bytes after stack barrier array
reservation), which means the largest "used" size that will qualify
for shrinking is 4048/4 - 8 = 1004 bytes. After copying, that leaves
2016 - 1004 = 1012 bytes of available stack, which is significantly
more than the guard space.
If we were to reduce the minimum stack size to 1K or raise the guard
space above 1012 bytes, the logic in shrinkstack would no longer leave
enough space.
It's also possible to trigger this problem by setting
firstStackBarrierOffset to 0, which puts stack barriers in a debug
mode that steals away *half* of the stack for the stack barrier array
reservation. Then, the largest "used" size that qualifies for
shrinking is (4096/2)/4 - 8 = 504 bytes. After copying, that leaves
(2096/2) - 504 = 8 bytes of available stack; much less than the
required guard space. This causes failures like those in issue #11027
because func gc() shrinks its own stack and then immediately calls
casgstatus (a nosplit function), which overflows the stack and
overwrites a free list pointer in the neighboring span. However, since
this seems to require the special debug mode, we don't think it's
responsible for issue #11027.
To forestall all of these subtle issues, this commit modifies
shrinkstack to correctly account for the guard space when considering
whether to halve the stack allocation.
Change-Id: I7312584addc63b5bfe55cc384a1012f6181f1b9d
Reviewed-on: https://go-review.googlesource.com/10714
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-04 17:28:02 -04:00
// Compute how much of the stack is currently in use and only
// shrink the stack if gp is using less than a quarter of its
// current stack. The currently used stack includes everything
// down to the SP plus the stack guard space that ensures
// there's room for nosplit functions.
avail := gp . stack . hi - gp . stack . lo
if used := gp . stack . hi - gp . sched . sp + _StackLimit ; used >= avail / 4 {
return
2014-11-11 17:04:34 -05:00
}
// We can't copy the stack if we're in a syscall.
// The syscall might have pointers into the stack.
if gp . syscallsp != 0 {
return
}
2015-11-11 12:39:30 -05:00
if sys . GoosWindows != 0 && gp . m != nil && gp . m . libcallsp != 0 {
2014-11-11 17:04:34 -05:00
return
}
if stackDebug > 0 {
print ( "shrinking stack " , oldsize , "->" , newsize , "\n" )
}
2014-11-15 08:00:38 -05:00
2016-02-15 17:38:06 -05:00
copystack ( gp , newsize , false )
2014-11-11 17:04:34 -05:00
}
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
// freeStackSpans frees unused stack spans at the end of GC.
func freeStackSpans ( ) {
2014-11-11 17:04:34 -05:00
lock ( & stackpoolmu )
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
// Scan stack pools for empty stack spans.
for order := range stackpool {
list := & stackpool [ order ]
2015-10-15 15:59:49 -07:00
for s := list . first ; s != nil ; {
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
next := s . next
2016-02-16 17:16:43 -05:00
if s . allocCount == 0 {
2015-11-11 16:13:51 -08:00
list . remove ( s )
2017-03-16 15:02:02 -04:00
s . manualFreeList = 0
2018-06-29 14:56:48 -04:00
osStackFree ( s )
2017-03-16 14:46:53 -04:00
mheap_ . freeManual ( s , & memstats . stacks_inuse )
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
}
s = next
}
2014-11-11 17:04:34 -05:00
}
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
unlock ( & stackpoolmu )
2015-12-14 14:30:25 -05:00
// Free large stack spans.
lock ( & stackLarge . lock )
for i := range stackLarge . free {
for s := stackLarge . free [ i ] . first ; s != nil ; {
next := s . next
stackLarge . free [ i ] . remove ( s )
2018-06-29 14:56:48 -04:00
osStackFree ( s )
2017-03-16 14:46:53 -04:00
mheap_ . freeManual ( s , & memstats . stacks_inuse )
2015-12-14 14:30:25 -05:00
s = next
}
}
unlock ( & stackLarge . lock )
2014-11-11 17:04:34 -05:00
}
2015-01-05 16:29:21 +00:00
2018-09-01 20:16:39 -07:00
// getStackMap returns the locals and arguments live pointer maps, and
// stack object list for frame.
func getStackMap ( frame * stkframe , cache * pcvalueCache , debug bool ) ( locals , args bitvector , objs [ ] stackObjectRecord ) {
2018-04-26 21:20:41 -04:00
targetpc := frame . continpc
if targetpc == 0 {
// Frame is dead. Return empty bitvectors.
return
}
f := frame . fn
pcdata := int32 ( - 1 )
if targetpc != f . entry {
// Back up to the CALL. If we're at the function entry
// point, we want to use the entry map (-1), even if
// the first instruction of the function changes the
// stack map.
targetpc --
pcdata = pcdatavalue ( f , _PCDATA_StackMapIndex , targetpc , cache )
}
if pcdata == - 1 {
// We do not have a valid pcdata value but there might be a
// stackmap for this function. It is likely that we are looking
// at the function prologue, assume so and hope for the best.
pcdata = 0
}
// Local variables.
size := frame . varp - frame . sp
var minsize uintptr
switch sys . ArchFamily {
case sys . ARM64 :
minsize = sys . SpAlign
default :
minsize = sys . MinFrameSize
}
if size > minsize {
2018-04-26 21:43:19 -04:00
var stkmap * stackmap
stackid := pcdata
if f . funcID != funcID_debugCallV1 {
stkmap = ( * stackmap ) ( funcdata ( f , _FUNCDATA_LocalsPointerMaps ) )
} else {
// debugCallV1's stack map is the register map
// at its call site.
callerPC := frame . lr
caller := findfunc ( callerPC )
if ! caller . valid ( ) {
println ( "runtime: debugCallV1 called by unknown caller" , hex ( callerPC ) )
throw ( "bad debugCallV1" )
}
stackid = int32 ( - 1 )
if callerPC != caller . entry {
callerPC --
stackid = pcdatavalue ( caller , _PCDATA_RegMapIndex , callerPC , cache )
}
if stackid == - 1 {
stackid = 0 // in prologue
}
stkmap = ( * stackmap ) ( funcdata ( caller , _FUNCDATA_RegPointerMaps ) )
}
if stkmap == nil || stkmap . n <= 0 {
2018-04-26 21:20:41 -04:00
print ( "runtime: frame " , funcname ( f ) , " untyped locals " , hex ( frame . varp - size ) , "+" , hex ( size ) , "\n" )
throw ( "missing stackmap" )
}
// If nbit == 0, there's no work to do.
2018-04-26 21:43:19 -04:00
if stkmap . nbit > 0 {
if stackid < 0 || stackid >= stkmap . n {
2018-04-26 21:20:41 -04:00
// don't know where we are
2018-04-26 21:43:19 -04:00
print ( "runtime: pcdata is " , stackid , " and " , stkmap . n , " locals stack map entries for " , funcname ( f ) , " (targetpc=" , hex ( targetpc ) , ")\n" )
2018-04-26 21:20:41 -04:00
throw ( "bad symbol table" )
}
2018-04-26 21:43:19 -04:00
locals = stackmapdata ( stkmap , stackid )
2018-04-26 21:20:41 -04:00
if stackDebug >= 3 && debug {
2018-04-26 21:43:19 -04:00
print ( " locals " , stackid , "/" , stkmap . n , " " , locals . n , " words " , locals . bytedata , "\n" )
2018-04-26 21:20:41 -04:00
}
} else if stackDebug >= 3 && debug {
print ( " no locals to adjust\n" )
}
}
// Arguments.
if frame . arglen > 0 {
if frame . argmap != nil {
2018-11-12 15:49:09 -08:00
// argmap is set when the function is reflect.makeFuncStub or reflect.methodValueCall.
// In this case, arglen specifies how much of the args section is actually live.
// (It could be either all the args + results, or just the args.)
2018-04-26 21:20:41 -04:00
args = * frame . argmap
2018-11-12 15:49:09 -08:00
n := int32 ( frame . arglen / sys . PtrSize )
if n < args . n {
args . n = n // Don't use more of the arguments than arglen.
}
2018-04-26 21:20:41 -04:00
} else {
stackmap := ( * stackmap ) ( funcdata ( f , _FUNCDATA_ArgsPointerMaps ) )
if stackmap == nil || stackmap . n <= 0 {
print ( "runtime: frame " , funcname ( f ) , " untyped args " , hex ( frame . argp ) , "+" , hex ( frame . arglen ) , "\n" )
throw ( "missing stackmap" )
}
if pcdata < 0 || pcdata >= stackmap . n {
// don't know where we are
print ( "runtime: pcdata is " , pcdata , " and " , stackmap . n , " args stack map entries for " , funcname ( f ) , " (targetpc=" , hex ( targetpc ) , ")\n" )
throw ( "bad symbol table" )
}
if stackmap . nbit > 0 {
args = stackmapdata ( stackmap , pcdata )
}
}
}
2018-09-01 20:16:39 -07:00
// stack objects.
p := funcdata ( f , _FUNCDATA_StackObjects )
if p != nil {
n := * ( * uintptr ) ( p )
p = add ( p , sys . PtrSize )
* ( * slice ) ( unsafe . Pointer ( & objs ) ) = slice { array : noescape ( p ) , len : int ( n ) , cap : int ( n ) }
// Note: the noescape above is needed to keep
2018-10-06 06:10:25 +00:00
// getStackMap from "leaking param content:
2018-09-01 20:16:39 -07:00
// frame". That leak propagates up to getgcmask, then
// GCMask, then verifyGCInfo, which converts the stack
// gcinfo tests into heap gcinfo tests :(
}
2018-04-26 21:20:41 -04:00
return
}
2018-09-01 20:16:39 -07:00
// A stackObjectRecord is generated by the compiler for each stack object in a stack frame.
// This record must match the generator code in cmd/compile/internal/gc/ssa.go:emitStackObjects.
type stackObjectRecord struct {
// offset in frame
// if negative, offset from varp
// if non-negative, offset from argp
off int
typ * _type
}
2019-05-31 16:38:56 -04:00
// This is exported as ABI0 via linkname so obj can call it.
//
2015-01-05 16:29:21 +00:00
//go:nosplit
2019-05-31 16:38:56 -04:00
//go:linkname morestackc
2015-01-05 16:29:21 +00:00
func morestackc ( ) {
2018-01-12 12:39:22 -05:00
throw ( "attempt to execute system stack code on user stack" )
2015-01-05 16:29:21 +00:00
}