2014-11-11 17:04:34 -05:00
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runtime
import "unsafe"
const (
2015-05-01 15:53:45 +10:00
// stackDebug == 0: no logging
2014-11-11 17:04:34 -05:00
// == 1: logging of per-stack operations
// == 2: logging of per-frame operations
// == 3: logging of per-word updates
// == 4: logging of per-word reads
stackDebug = 0
stackFromSystem = 0 // allocate stacks from system memory instead of the heap
stackFaultOnFree = 0 // old stacks are mapped noaccess to detect use after free
stackPoisonCopy = 0 // fill stack that should not be accessed with garbage, to detect bad dereferences during copy
stackCache = 1
)
const (
uintptrMask = 1 << ( 8 * ptrSize ) - 1
poisonStack = uintptrMask & 0x6868686868686868
// Goroutine preemption request.
2015-01-05 16:29:21 +00:00
// Stored into g->stackguard0 to cause split stack check failure.
2014-11-11 17:04:34 -05:00
// Must be greater than any real sp.
// 0xfffffade in hex.
stackPreempt = uintptrMask & - 1314
// Thread is forking.
2015-01-05 16:29:21 +00:00
// Stored into g->stackguard0 to cause split stack check failure.
2014-11-11 17:04:34 -05:00
// Must be greater than any real sp.
stackFork = uintptrMask & - 1234
)
// Global pool of spans that have free stacks.
// Stacks are assigned an order according to size.
// order = log_2(size/FixedStack)
// There is a free list for each order.
// TODO: one lock per order?
var stackpool [ _NumStackOrders ] mspan
var stackpoolmu mutex
var stackfreequeue stack
2015-01-14 11:09:50 -05:00
// Cached value of haveexperiment("framepointer")
var framepointer_enabled bool
2014-11-11 17:04:34 -05:00
func stackinit ( ) {
if _StackCacheSize & _PageMask != 0 {
2014-12-27 20:58:00 -08:00
throw ( "cache size must be a multiple of page size" )
2014-11-11 17:04:34 -05:00
}
for i := range stackpool {
mSpanList_Init ( & stackpool [ i ] )
}
}
// Allocates a stack from the free pool. Must be called with
// stackpoolmu held.
2014-11-20 12:08:13 -05:00
func stackpoolalloc ( order uint8 ) gclinkptr {
2014-11-11 17:04:34 -05:00
list := & stackpool [ order ]
s := list . next
if s == list {
// no free stacks. Allocate another span worth.
s = mHeap_AllocStack ( & mheap_ , _StackCacheSize >> _PageShift )
if s == nil {
2014-12-27 20:58:00 -08:00
throw ( "out of memory" )
2014-11-11 17:04:34 -05:00
}
if s . ref != 0 {
2014-12-27 20:58:00 -08:00
throw ( "bad ref" )
2014-11-11 17:04:34 -05:00
}
2014-11-20 12:08:13 -05:00
if s . freelist . ptr ( ) != nil {
2014-12-27 20:58:00 -08:00
throw ( "bad freelist" )
2014-11-11 17:04:34 -05:00
}
for i := uintptr ( 0 ) ; i < _StackCacheSize ; i += _FixedStack << order {
2014-11-20 12:08:13 -05:00
x := gclinkptr ( uintptr ( s . start ) << _PageShift + i )
x . ptr ( ) . next = s . freelist
2014-11-11 17:04:34 -05:00
s . freelist = x
}
mSpanList_Insert ( list , s )
}
x := s . freelist
2014-11-20 12:08:13 -05:00
if x . ptr ( ) == nil {
2014-12-27 20:58:00 -08:00
throw ( "span has no free stacks" )
2014-11-11 17:04:34 -05:00
}
2014-11-20 12:08:13 -05:00
s . freelist = x . ptr ( ) . next
2014-11-11 17:04:34 -05:00
s . ref ++
2014-11-20 12:08:13 -05:00
if s . freelist . ptr ( ) == nil {
2014-11-11 17:04:34 -05:00
// all stacks in s are allocated.
mSpanList_Remove ( s )
}
return x
}
// Adds stack x to the free pool. Must be called with stackpoolmu held.
2014-11-20 12:08:13 -05:00
func stackpoolfree ( x gclinkptr , order uint8 ) {
2014-11-11 17:04:34 -05:00
s := mHeap_Lookup ( & mheap_ , ( unsafe . Pointer ) ( x ) )
if s . state != _MSpanStack {
2014-12-27 20:58:00 -08:00
throw ( "freeing stack not in a stack span" )
2014-11-11 17:04:34 -05:00
}
2014-11-20 12:08:13 -05:00
if s . freelist . ptr ( ) == nil {
2014-11-11 17:04:34 -05:00
// s will now have a free stack
mSpanList_Insert ( & stackpool [ order ] , s )
}
2014-11-20 12:08:13 -05:00
x . ptr ( ) . next = s . freelist
2014-11-11 17:04:34 -05:00
s . freelist = x
s . ref --
if s . ref == 0 {
// span is completely free - return to heap
mSpanList_Remove ( s )
2014-11-20 12:08:13 -05:00
s . freelist = 0
2014-11-11 17:04:34 -05:00
mHeap_FreeStack ( & mheap_ , s )
}
}
// stackcacherefill/stackcacherelease implement a global pool of stack segments.
// The pool is required to prevent unlimited growth of per-thread caches.
func stackcacherefill ( c * mcache , order uint8 ) {
if stackDebug >= 1 {
print ( "stackcacherefill order=" , order , "\n" )
}
// Grab some stacks from the global cache.
// Grab half of the allowed capacity (to prevent thrashing).
2014-11-20 12:08:13 -05:00
var list gclinkptr
2014-11-11 17:04:34 -05:00
var size uintptr
lock ( & stackpoolmu )
for size < _StackCacheSize / 2 {
x := stackpoolalloc ( order )
2014-11-20 12:08:13 -05:00
x . ptr ( ) . next = list
2014-11-11 17:04:34 -05:00
list = x
size += _FixedStack << order
}
unlock ( & stackpoolmu )
c . stackcache [ order ] . list = list
c . stackcache [ order ] . size = size
}
func stackcacherelease ( c * mcache , order uint8 ) {
if stackDebug >= 1 {
print ( "stackcacherelease order=" , order , "\n" )
}
x := c . stackcache [ order ] . list
size := c . stackcache [ order ] . size
lock ( & stackpoolmu )
for size > _StackCacheSize / 2 {
2014-11-20 12:08:13 -05:00
y := x . ptr ( ) . next
2014-11-11 17:04:34 -05:00
stackpoolfree ( x , order )
x = y
size -= _FixedStack << order
}
unlock ( & stackpoolmu )
c . stackcache [ order ] . list = x
c . stackcache [ order ] . size = size
}
func stackcache_clear ( c * mcache ) {
if stackDebug >= 1 {
print ( "stackcache clear\n" )
}
lock ( & stackpoolmu )
for order := uint8 ( 0 ) ; order < _NumStackOrders ; order ++ {
x := c . stackcache [ order ] . list
2014-11-20 12:08:13 -05:00
for x . ptr ( ) != nil {
y := x . ptr ( ) . next
2014-11-11 17:04:34 -05:00
stackpoolfree ( x , order )
x = y
}
2014-11-20 12:08:13 -05:00
c . stackcache [ order ] . list = 0
2014-11-11 17:04:34 -05:00
c . stackcache [ order ] . size = 0
}
unlock ( & stackpoolmu )
}
func stackalloc ( n uint32 ) stack {
// Stackalloc must be called on scheduler stack, so that we
// never try to grow the stack during the code that stackalloc runs.
// Doing so would cause a deadlock (issue 1547).
thisg := getg ( )
if thisg != thisg . m . g0 {
2014-12-27 20:58:00 -08:00
throw ( "stackalloc not on scheduler stack" )
2014-11-11 17:04:34 -05:00
}
if n & ( n - 1 ) != 0 {
2014-12-27 20:58:00 -08:00
throw ( "stack size not a power of 2" )
2014-11-11 17:04:34 -05:00
}
if stackDebug >= 1 {
print ( "stackalloc " , n , "\n" )
}
if debug . efence != 0 || stackFromSystem != 0 {
v := sysAlloc ( round ( uintptr ( n ) , _PageSize ) , & memstats . stacks_sys )
if v == nil {
2014-12-27 20:58:00 -08:00
throw ( "out of memory (stackalloc)" )
2014-11-11 17:04:34 -05:00
}
return stack { uintptr ( v ) , uintptr ( v ) + uintptr ( n ) }
}
// Small stacks are allocated with a fixed-size free-list allocator.
// If we need a stack of a bigger size, we fall back on allocating
// a dedicated span.
var v unsafe . Pointer
if stackCache != 0 && n < _FixedStack << _NumStackOrders && n < _StackCacheSize {
order := uint8 ( 0 )
n2 := n
for n2 > _FixedStack {
order ++
n2 >>= 1
}
2014-11-20 12:08:13 -05:00
var x gclinkptr
2014-11-11 17:04:34 -05:00
c := thisg . m . mcache
2015-01-30 15:30:41 -05:00
if c == nil || thisg . m . preemptoff != "" || thisg . m . helpgc != 0 {
2014-11-11 17:04:34 -05:00
// c == nil can happen in the guts of exitsyscall or
// procresize. Just get a stack from the global pool.
// Also don't touch stackcache during gc
// as it's flushed concurrently.
lock ( & stackpoolmu )
x = stackpoolalloc ( order )
unlock ( & stackpoolmu )
} else {
x = c . stackcache [ order ] . list
2014-11-20 12:08:13 -05:00
if x . ptr ( ) == nil {
2014-11-11 17:04:34 -05:00
stackcacherefill ( c , order )
x = c . stackcache [ order ] . list
}
2014-11-20 12:08:13 -05:00
c . stackcache [ order ] . list = x . ptr ( ) . next
2014-11-11 17:04:34 -05:00
c . stackcache [ order ] . size -= uintptr ( n )
}
v = ( unsafe . Pointer ) ( x )
} else {
s := mHeap_AllocStack ( & mheap_ , round ( uintptr ( n ) , _PageSize ) >> _PageShift )
if s == nil {
2014-12-27 20:58:00 -08:00
throw ( "out of memory" )
2014-11-11 17:04:34 -05:00
}
v = ( unsafe . Pointer ) ( s . start << _PageShift )
}
if raceenabled {
racemalloc ( v , uintptr ( n ) )
}
if stackDebug >= 1 {
print ( " allocated " , v , "\n" )
}
return stack { uintptr ( v ) , uintptr ( v ) + uintptr ( n ) }
}
func stackfree ( stk stack ) {
gp := getg ( )
n := stk . hi - stk . lo
v := ( unsafe . Pointer ) ( stk . lo )
if n & ( n - 1 ) != 0 {
2014-12-27 20:58:00 -08:00
throw ( "stack not a power of 2" )
2014-11-11 17:04:34 -05:00
}
if stackDebug >= 1 {
println ( "stackfree" , v , n )
memclr ( v , n ) // for testing, clobber stack data
}
if debug . efence != 0 || stackFromSystem != 0 {
if debug . efence != 0 || stackFaultOnFree != 0 {
sysFault ( v , n )
} else {
sysFree ( v , n , & memstats . stacks_sys )
}
return
}
if stackCache != 0 && n < _FixedStack << _NumStackOrders && n < _StackCacheSize {
order := uint8 ( 0 )
n2 := n
for n2 > _FixedStack {
order ++
n2 >>= 1
}
2014-11-20 12:08:13 -05:00
x := gclinkptr ( v )
2014-11-11 17:04:34 -05:00
c := gp . m . mcache
2015-01-30 15:30:41 -05:00
if c == nil || gp . m . preemptoff != "" || gp . m . helpgc != 0 {
2014-11-11 17:04:34 -05:00
lock ( & stackpoolmu )
stackpoolfree ( x , order )
unlock ( & stackpoolmu )
} else {
if c . stackcache [ order ] . size >= _StackCacheSize {
stackcacherelease ( c , order )
}
2014-11-20 12:08:13 -05:00
x . ptr ( ) . next = c . stackcache [ order ] . list
2014-11-11 17:04:34 -05:00
c . stackcache [ order ] . list = x
c . stackcache [ order ] . size += n
}
} else {
s := mHeap_Lookup ( & mheap_ , v )
if s . state != _MSpanStack {
println ( hex ( s . start << _PageShift ) , v )
2014-12-27 20:58:00 -08:00
throw ( "bad span state" )
2014-11-11 17:04:34 -05:00
}
mHeap_FreeStack ( & mheap_ , s )
}
}
var maxstacksize uintptr = 1 << 20 // enough until runtime.main sets it for real
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
var ptrnames = [ ] string {
0 : "scalar" ,
1 : "ptr" ,
2014-11-11 17:04:34 -05:00
}
// Stack frame layout
//
// (x86)
// +------------------+
// | args from caller |
// +------------------+ <- frame->argp
// | return address |
2015-01-14 11:09:50 -05:00
// +------------------+
// | caller's BP (*) | (*) if framepointer_enabled && varp < sp
2014-11-11 17:04:34 -05:00
// +------------------+ <- frame->varp
// | locals |
// +------------------+
// | args to callee |
// +------------------+ <- frame->sp
//
// (arm)
// +------------------+
// | args from caller |
// +------------------+ <- frame->argp
// | caller's retaddr |
// +------------------+ <- frame->varp
// | locals |
// +------------------+
// | args to callee |
// +------------------+
// | return address |
// +------------------+ <- frame->sp
type adjustinfo struct {
old stack
delta uintptr // ptr distance from old to new stack (newbase - oldbase)
}
// Adjustpointer checks whether *vpp is in the old stack described by adjinfo.
// If so, it rewrites *vpp to point into the new stack.
func adjustpointer ( adjinfo * adjustinfo , vpp unsafe . Pointer ) {
pp := ( * unsafe . Pointer ) ( vpp )
p := * pp
if stackDebug >= 4 {
print ( " " , pp , ":" , p , "\n" )
}
if adjinfo . old . lo <= uintptr ( p ) && uintptr ( p ) < adjinfo . old . hi {
* pp = add ( p , adjinfo . delta )
if stackDebug >= 3 {
print ( " adjust ptr " , pp , ":" , p , " -> " , * pp , "\n" )
}
}
}
2015-05-04 10:19:24 -04:00
// Information from the compiler about the layout of stack frames.
type bitvector struct {
n int32 // # of bits
bytedata * uint8
}
2014-11-11 17:04:34 -05:00
type gobitvector struct {
n uintptr
bytedata [ ] uint8
}
func gobv ( bv bitvector ) gobitvector {
return gobitvector {
uintptr ( bv . n ) ,
( * [ 1 << 30 ] byte ) ( unsafe . Pointer ( bv . bytedata ) ) [ : ( bv . n + 7 ) / 8 ] ,
}
}
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
func ptrbit ( bv * gobitvector , i uintptr ) uint8 {
return ( bv . bytedata [ i / 8 ] >> ( i % 8 ) ) & 1
2014-11-11 17:04:34 -05:00
}
// bv describes the memory starting at address scanp.
// Adjust any pointers contained therein.
func adjustpointers ( scanp unsafe . Pointer , cbv * bitvector , adjinfo * adjustinfo , f * _func ) {
bv := gobv ( * cbv )
minp := adjinfo . old . lo
maxp := adjinfo . old . hi
delta := adjinfo . delta
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
num := uintptr ( bv . n )
2014-11-11 17:04:34 -05:00
for i := uintptr ( 0 ) ; i < num ; i ++ {
if stackDebug >= 4 {
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
print ( " " , add ( scanp , i * ptrSize ) , ":" , ptrnames [ ptrbit ( & bv , i ) ] , ":" , hex ( * ( * uintptr ) ( add ( scanp , i * ptrSize ) ) ) , " # " , i , " " , bv . bytedata [ i / 4 ] , "\n" )
2014-11-11 17:04:34 -05:00
}
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
if ptrbit ( & bv , i ) == 1 {
runtime: eliminate write barrier from adjustpointers
Currently adjustpointers invokes a write barrier for every stack slot
it updates. This is safe---the write barrier always does nothing
because the new value is never a heap pointer---but it's unnecessary
overhead in performance and complexity.
Fix this by rewriting adjustpointers to work with *uintptrs instead of
*unsafe.Pointers. As an added bonus, this makes the code cleaner.
name old mean new mean delta
BinaryTree17 3.35s × (0.98,1.01) 3.33s × (0.99,1.02) ~ (p=0.095 n=20+19)
Fannkuch11 2.49s × (1.00,1.01) 2.52s × (0.99,1.01) +1.23% (p=0.000 n=19+20)
FmtFprintfEmpty 52.2ns × (0.99,1.02) 52.2ns × (0.99,1.02) ~ (p=0.766 n=19+19)
FmtFprintfString 181ns × (0.99,1.02) 179ns × (0.99,1.01) -1.06% (p=0.000 n=20+19)
FmtFprintfInt 177ns × (0.99,1.01) 173ns × (0.99,1.02) -2.26% (p=0.000 n=17+20)
FmtFprintfIntInt 300ns × (0.99,1.01) 302ns × (0.99,1.01) +0.76% (p=0.000 n=19+20)
FmtFprintfPrefixedInt 253ns × (0.99,1.02) 256ns × (0.99,1.01) +0.96% (p=0.000 n=20+19)
FmtFprintfFloat 334ns × (0.99,1.02) 334ns × (1.00,1.01) ~ (p=0.243 n=20+19)
FmtManyArgs 1.16µs × (0.99,1.01) 1.17µs × (0.99,1.02) +0.88% (p=0.000 n=20+20)
GobDecode 9.16ms × (0.99,1.02) 9.18ms × (1.00,1.00) +0.21% (p=0.048 n=20+17)
GobEncode 7.03ms × (0.99,1.01) 7.05ms × (0.99,1.01) ~ (p=0.091 n=19+19)
Gzip 374ms × (0.99,1.01) 372ms × (0.99,1.02) -0.50% (p=0.008 n=18+20)
Gunzip 92.9ms × (0.99,1.01) 92.5ms × (1.00,1.01) -0.47% (p=0.002 n=19+19)
HTTPClientServer 53.1µs × (0.98,1.01) 52.5µs × (0.99,1.01) -0.98% (p=0.000 n=20+19)
JSONEncode 17.4ms × (0.99,1.02) 17.5ms × (0.99,1.01) ~ (p=0.061 n=19+20)
JSONDecode 66.0ms × (0.99,1.02) 64.7ms × (0.99,1.01) -1.87% (p=0.000 n=20+20)
Mandelbrot200 3.94ms × (1.00,1.01) 3.95ms × (1.00,1.01) ~ (p=0.799 n=18+19)
GoParse 3.89ms × (0.99,1.02) 3.86ms × (0.99,1.01) -0.70% (p=0.016 n=20+19)
RegexpMatchEasy0_32 102ns × (0.99,1.02) 102ns × (1.00,1.01) ~ (p=0.557 n=20+18)
RegexpMatchEasy0_1K 353ns × (0.99,1.02) 341ns × (0.99,1.01) -3.38% (p=0.000 n=20+20)
RegexpMatchEasy1_32 85.0ns × (0.99,1.02) 85.0ns × (0.99,1.01) ~ (p=0.851 n=19+20)
RegexpMatchEasy1_1K 521ns × (0.99,1.02) 506ns × (1.00,1.01) -2.85% (p=0.000 n=20+18)
RegexpMatchMedium_32 142ns × (0.99,1.02) 141ns × (1.00,1.01) -1.17% (p=0.000 n=20+19)
RegexpMatchMedium_1K 42.8µs × (0.99,1.01) 42.3µs × (0.99,1.01) -1.07% (p=0.000 n=20+19)
RegexpMatchHard_32 2.17µs × (0.99,1.01) 2.16µs × (1.00,1.01) -0.51% (p=0.042 n=20+18)
RegexpMatchHard_1K 65.6µs × (0.99,1.01) 64.8µs × (1.00,1.00) -1.21% (p=0.000 n=20+17)
Revcomp 581ms × (0.99,1.04) 536ms × (1.00,1.01) -7.71% (p=0.000 n=20+18)
Template 77.2ms × (0.99,1.01) 76.8ms × (0.99,1.01) ~ (p=0.426 n=20+18)
TimeParse 369ns × (0.99,1.02) 371ns × (1.00,1.01) ~ (p=0.117 n=20+19)
TimeFormat 371ns × (0.99,1.02) 391ns × (0.99,1.01) +5.33% (p=0.000 n=20+19)
Change-Id: I5b952ba577ac4365c8c87db837c5804a1e30b7be
Reviewed-on: https://go-review.googlesource.com/10293
Reviewed-by: Russ Cox <rsc@golang.org>
2015-05-20 11:57:02 -04:00
pp := ( * uintptr ) ( add ( scanp , i * ptrSize ) )
p := * pp
if f != nil && 0 < p && p < _PageSize && debug . invalidptr != 0 || p == poisonStack {
2014-11-11 17:04:34 -05:00
// Looks like a junk value in a pointer slot.
// Live analysis wrong?
getg ( ) . m . traceback = 2
runtime: eliminate write barrier from adjustpointers
Currently adjustpointers invokes a write barrier for every stack slot
it updates. This is safe---the write barrier always does nothing
because the new value is never a heap pointer---but it's unnecessary
overhead in performance and complexity.
Fix this by rewriting adjustpointers to work with *uintptrs instead of
*unsafe.Pointers. As an added bonus, this makes the code cleaner.
name old mean new mean delta
BinaryTree17 3.35s × (0.98,1.01) 3.33s × (0.99,1.02) ~ (p=0.095 n=20+19)
Fannkuch11 2.49s × (1.00,1.01) 2.52s × (0.99,1.01) +1.23% (p=0.000 n=19+20)
FmtFprintfEmpty 52.2ns × (0.99,1.02) 52.2ns × (0.99,1.02) ~ (p=0.766 n=19+19)
FmtFprintfString 181ns × (0.99,1.02) 179ns × (0.99,1.01) -1.06% (p=0.000 n=20+19)
FmtFprintfInt 177ns × (0.99,1.01) 173ns × (0.99,1.02) -2.26% (p=0.000 n=17+20)
FmtFprintfIntInt 300ns × (0.99,1.01) 302ns × (0.99,1.01) +0.76% (p=0.000 n=19+20)
FmtFprintfPrefixedInt 253ns × (0.99,1.02) 256ns × (0.99,1.01) +0.96% (p=0.000 n=20+19)
FmtFprintfFloat 334ns × (0.99,1.02) 334ns × (1.00,1.01) ~ (p=0.243 n=20+19)
FmtManyArgs 1.16µs × (0.99,1.01) 1.17µs × (0.99,1.02) +0.88% (p=0.000 n=20+20)
GobDecode 9.16ms × (0.99,1.02) 9.18ms × (1.00,1.00) +0.21% (p=0.048 n=20+17)
GobEncode 7.03ms × (0.99,1.01) 7.05ms × (0.99,1.01) ~ (p=0.091 n=19+19)
Gzip 374ms × (0.99,1.01) 372ms × (0.99,1.02) -0.50% (p=0.008 n=18+20)
Gunzip 92.9ms × (0.99,1.01) 92.5ms × (1.00,1.01) -0.47% (p=0.002 n=19+19)
HTTPClientServer 53.1µs × (0.98,1.01) 52.5µs × (0.99,1.01) -0.98% (p=0.000 n=20+19)
JSONEncode 17.4ms × (0.99,1.02) 17.5ms × (0.99,1.01) ~ (p=0.061 n=19+20)
JSONDecode 66.0ms × (0.99,1.02) 64.7ms × (0.99,1.01) -1.87% (p=0.000 n=20+20)
Mandelbrot200 3.94ms × (1.00,1.01) 3.95ms × (1.00,1.01) ~ (p=0.799 n=18+19)
GoParse 3.89ms × (0.99,1.02) 3.86ms × (0.99,1.01) -0.70% (p=0.016 n=20+19)
RegexpMatchEasy0_32 102ns × (0.99,1.02) 102ns × (1.00,1.01) ~ (p=0.557 n=20+18)
RegexpMatchEasy0_1K 353ns × (0.99,1.02) 341ns × (0.99,1.01) -3.38% (p=0.000 n=20+20)
RegexpMatchEasy1_32 85.0ns × (0.99,1.02) 85.0ns × (0.99,1.01) ~ (p=0.851 n=19+20)
RegexpMatchEasy1_1K 521ns × (0.99,1.02) 506ns × (1.00,1.01) -2.85% (p=0.000 n=20+18)
RegexpMatchMedium_32 142ns × (0.99,1.02) 141ns × (1.00,1.01) -1.17% (p=0.000 n=20+19)
RegexpMatchMedium_1K 42.8µs × (0.99,1.01) 42.3µs × (0.99,1.01) -1.07% (p=0.000 n=20+19)
RegexpMatchHard_32 2.17µs × (0.99,1.01) 2.16µs × (1.00,1.01) -0.51% (p=0.042 n=20+18)
RegexpMatchHard_1K 65.6µs × (0.99,1.01) 64.8µs × (1.00,1.00) -1.21% (p=0.000 n=20+17)
Revcomp 581ms × (0.99,1.04) 536ms × (1.00,1.01) -7.71% (p=0.000 n=20+18)
Template 77.2ms × (0.99,1.01) 76.8ms × (0.99,1.01) ~ (p=0.426 n=20+18)
TimeParse 369ns × (0.99,1.02) 371ns × (1.00,1.01) ~ (p=0.117 n=20+19)
TimeFormat 371ns × (0.99,1.02) 391ns × (0.99,1.01) +5.33% (p=0.000 n=20+19)
Change-Id: I5b952ba577ac4365c8c87db837c5804a1e30b7be
Reviewed-on: https://go-review.googlesource.com/10293
Reviewed-by: Russ Cox <rsc@golang.org>
2015-05-20 11:57:02 -04:00
print ( "runtime: bad pointer in frame " , funcname ( f ) , " at " , pp , ": " , hex ( p ) , "\n" )
2014-12-27 20:58:00 -08:00
throw ( "invalid stack pointer" )
2014-11-11 17:04:34 -05:00
}
runtime: eliminate write barrier from adjustpointers
Currently adjustpointers invokes a write barrier for every stack slot
it updates. This is safe---the write barrier always does nothing
because the new value is never a heap pointer---but it's unnecessary
overhead in performance and complexity.
Fix this by rewriting adjustpointers to work with *uintptrs instead of
*unsafe.Pointers. As an added bonus, this makes the code cleaner.
name old mean new mean delta
BinaryTree17 3.35s × (0.98,1.01) 3.33s × (0.99,1.02) ~ (p=0.095 n=20+19)
Fannkuch11 2.49s × (1.00,1.01) 2.52s × (0.99,1.01) +1.23% (p=0.000 n=19+20)
FmtFprintfEmpty 52.2ns × (0.99,1.02) 52.2ns × (0.99,1.02) ~ (p=0.766 n=19+19)
FmtFprintfString 181ns × (0.99,1.02) 179ns × (0.99,1.01) -1.06% (p=0.000 n=20+19)
FmtFprintfInt 177ns × (0.99,1.01) 173ns × (0.99,1.02) -2.26% (p=0.000 n=17+20)
FmtFprintfIntInt 300ns × (0.99,1.01) 302ns × (0.99,1.01) +0.76% (p=0.000 n=19+20)
FmtFprintfPrefixedInt 253ns × (0.99,1.02) 256ns × (0.99,1.01) +0.96% (p=0.000 n=20+19)
FmtFprintfFloat 334ns × (0.99,1.02) 334ns × (1.00,1.01) ~ (p=0.243 n=20+19)
FmtManyArgs 1.16µs × (0.99,1.01) 1.17µs × (0.99,1.02) +0.88% (p=0.000 n=20+20)
GobDecode 9.16ms × (0.99,1.02) 9.18ms × (1.00,1.00) +0.21% (p=0.048 n=20+17)
GobEncode 7.03ms × (0.99,1.01) 7.05ms × (0.99,1.01) ~ (p=0.091 n=19+19)
Gzip 374ms × (0.99,1.01) 372ms × (0.99,1.02) -0.50% (p=0.008 n=18+20)
Gunzip 92.9ms × (0.99,1.01) 92.5ms × (1.00,1.01) -0.47% (p=0.002 n=19+19)
HTTPClientServer 53.1µs × (0.98,1.01) 52.5µs × (0.99,1.01) -0.98% (p=0.000 n=20+19)
JSONEncode 17.4ms × (0.99,1.02) 17.5ms × (0.99,1.01) ~ (p=0.061 n=19+20)
JSONDecode 66.0ms × (0.99,1.02) 64.7ms × (0.99,1.01) -1.87% (p=0.000 n=20+20)
Mandelbrot200 3.94ms × (1.00,1.01) 3.95ms × (1.00,1.01) ~ (p=0.799 n=18+19)
GoParse 3.89ms × (0.99,1.02) 3.86ms × (0.99,1.01) -0.70% (p=0.016 n=20+19)
RegexpMatchEasy0_32 102ns × (0.99,1.02) 102ns × (1.00,1.01) ~ (p=0.557 n=20+18)
RegexpMatchEasy0_1K 353ns × (0.99,1.02) 341ns × (0.99,1.01) -3.38% (p=0.000 n=20+20)
RegexpMatchEasy1_32 85.0ns × (0.99,1.02) 85.0ns × (0.99,1.01) ~ (p=0.851 n=19+20)
RegexpMatchEasy1_1K 521ns × (0.99,1.02) 506ns × (1.00,1.01) -2.85% (p=0.000 n=20+18)
RegexpMatchMedium_32 142ns × (0.99,1.02) 141ns × (1.00,1.01) -1.17% (p=0.000 n=20+19)
RegexpMatchMedium_1K 42.8µs × (0.99,1.01) 42.3µs × (0.99,1.01) -1.07% (p=0.000 n=20+19)
RegexpMatchHard_32 2.17µs × (0.99,1.01) 2.16µs × (1.00,1.01) -0.51% (p=0.042 n=20+18)
RegexpMatchHard_1K 65.6µs × (0.99,1.01) 64.8µs × (1.00,1.00) -1.21% (p=0.000 n=20+17)
Revcomp 581ms × (0.99,1.04) 536ms × (1.00,1.01) -7.71% (p=0.000 n=20+18)
Template 77.2ms × (0.99,1.01) 76.8ms × (0.99,1.01) ~ (p=0.426 n=20+18)
TimeParse 369ns × (0.99,1.02) 371ns × (1.00,1.01) ~ (p=0.117 n=20+19)
TimeFormat 371ns × (0.99,1.02) 391ns × (0.99,1.01) +5.33% (p=0.000 n=20+19)
Change-Id: I5b952ba577ac4365c8c87db837c5804a1e30b7be
Reviewed-on: https://go-review.googlesource.com/10293
Reviewed-by: Russ Cox <rsc@golang.org>
2015-05-20 11:57:02 -04:00
if minp <= p && p < maxp {
2014-11-11 17:04:34 -05:00
if stackDebug >= 3 {
2014-12-28 23:16:32 -08:00
print ( "adjust ptr " , p , " " , funcname ( f ) , "\n" )
2014-11-11 17:04:34 -05:00
}
runtime: eliminate write barrier from adjustpointers
Currently adjustpointers invokes a write barrier for every stack slot
it updates. This is safe---the write barrier always does nothing
because the new value is never a heap pointer---but it's unnecessary
overhead in performance and complexity.
Fix this by rewriting adjustpointers to work with *uintptrs instead of
*unsafe.Pointers. As an added bonus, this makes the code cleaner.
name old mean new mean delta
BinaryTree17 3.35s × (0.98,1.01) 3.33s × (0.99,1.02) ~ (p=0.095 n=20+19)
Fannkuch11 2.49s × (1.00,1.01) 2.52s × (0.99,1.01) +1.23% (p=0.000 n=19+20)
FmtFprintfEmpty 52.2ns × (0.99,1.02) 52.2ns × (0.99,1.02) ~ (p=0.766 n=19+19)
FmtFprintfString 181ns × (0.99,1.02) 179ns × (0.99,1.01) -1.06% (p=0.000 n=20+19)
FmtFprintfInt 177ns × (0.99,1.01) 173ns × (0.99,1.02) -2.26% (p=0.000 n=17+20)
FmtFprintfIntInt 300ns × (0.99,1.01) 302ns × (0.99,1.01) +0.76% (p=0.000 n=19+20)
FmtFprintfPrefixedInt 253ns × (0.99,1.02) 256ns × (0.99,1.01) +0.96% (p=0.000 n=20+19)
FmtFprintfFloat 334ns × (0.99,1.02) 334ns × (1.00,1.01) ~ (p=0.243 n=20+19)
FmtManyArgs 1.16µs × (0.99,1.01) 1.17µs × (0.99,1.02) +0.88% (p=0.000 n=20+20)
GobDecode 9.16ms × (0.99,1.02) 9.18ms × (1.00,1.00) +0.21% (p=0.048 n=20+17)
GobEncode 7.03ms × (0.99,1.01) 7.05ms × (0.99,1.01) ~ (p=0.091 n=19+19)
Gzip 374ms × (0.99,1.01) 372ms × (0.99,1.02) -0.50% (p=0.008 n=18+20)
Gunzip 92.9ms × (0.99,1.01) 92.5ms × (1.00,1.01) -0.47% (p=0.002 n=19+19)
HTTPClientServer 53.1µs × (0.98,1.01) 52.5µs × (0.99,1.01) -0.98% (p=0.000 n=20+19)
JSONEncode 17.4ms × (0.99,1.02) 17.5ms × (0.99,1.01) ~ (p=0.061 n=19+20)
JSONDecode 66.0ms × (0.99,1.02) 64.7ms × (0.99,1.01) -1.87% (p=0.000 n=20+20)
Mandelbrot200 3.94ms × (1.00,1.01) 3.95ms × (1.00,1.01) ~ (p=0.799 n=18+19)
GoParse 3.89ms × (0.99,1.02) 3.86ms × (0.99,1.01) -0.70% (p=0.016 n=20+19)
RegexpMatchEasy0_32 102ns × (0.99,1.02) 102ns × (1.00,1.01) ~ (p=0.557 n=20+18)
RegexpMatchEasy0_1K 353ns × (0.99,1.02) 341ns × (0.99,1.01) -3.38% (p=0.000 n=20+20)
RegexpMatchEasy1_32 85.0ns × (0.99,1.02) 85.0ns × (0.99,1.01) ~ (p=0.851 n=19+20)
RegexpMatchEasy1_1K 521ns × (0.99,1.02) 506ns × (1.00,1.01) -2.85% (p=0.000 n=20+18)
RegexpMatchMedium_32 142ns × (0.99,1.02) 141ns × (1.00,1.01) -1.17% (p=0.000 n=20+19)
RegexpMatchMedium_1K 42.8µs × (0.99,1.01) 42.3µs × (0.99,1.01) -1.07% (p=0.000 n=20+19)
RegexpMatchHard_32 2.17µs × (0.99,1.01) 2.16µs × (1.00,1.01) -0.51% (p=0.042 n=20+18)
RegexpMatchHard_1K 65.6µs × (0.99,1.01) 64.8µs × (1.00,1.00) -1.21% (p=0.000 n=20+17)
Revcomp 581ms × (0.99,1.04) 536ms × (1.00,1.01) -7.71% (p=0.000 n=20+18)
Template 77.2ms × (0.99,1.01) 76.8ms × (0.99,1.01) ~ (p=0.426 n=20+18)
TimeParse 369ns × (0.99,1.02) 371ns × (1.00,1.01) ~ (p=0.117 n=20+19)
TimeFormat 371ns × (0.99,1.02) 391ns × (0.99,1.01) +5.33% (p=0.000 n=20+19)
Change-Id: I5b952ba577ac4365c8c87db837c5804a1e30b7be
Reviewed-on: https://go-review.googlesource.com/10293
Reviewed-by: Russ Cox <rsc@golang.org>
2015-05-20 11:57:02 -04:00
* pp = p + delta
2014-11-11 17:04:34 -05:00
}
}
}
}
// Note: the argument/return area is adjusted by the callee.
func adjustframe ( frame * stkframe , arg unsafe . Pointer ) bool {
adjinfo := ( * adjustinfo ) ( arg )
targetpc := frame . continpc
if targetpc == 0 {
// Frame is dead.
return true
}
f := frame . fn
if stackDebug >= 2 {
print ( " adjusting " , funcname ( f ) , " frame=[" , hex ( frame . sp ) , "," , hex ( frame . fp ) , "] pc=" , hex ( frame . pc ) , " continpc=" , hex ( frame . continpc ) , "\n" )
}
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
if f . entry == systemstack_switchPC {
// A special routine at the bottom of stack of a goroutine that does an systemstack call.
2014-11-11 17:04:34 -05:00
// We will allow it to be copied even though we don't
// have full GC info for it (because it is written in asm).
return true
}
if targetpc != f . entry {
targetpc --
}
pcdata := pcdatavalue ( f , _PCDATA_StackMapIndex , targetpc )
if pcdata == - 1 {
pcdata = 0 // in prologue
}
// Adjust local variables if stack frame has been allocated.
size := frame . varp - frame . sp
var minsize uintptr
2015-03-08 14:20:20 +01:00
switch thechar {
case '6' , '8' :
2014-11-11 17:04:34 -05:00
minsize = 0
2015-03-08 14:20:20 +01:00
case '7' :
minsize = spAlign
default :
minsize = ptrSize
2014-11-11 17:04:34 -05:00
}
if size > minsize {
var bv bitvector
stackmap := ( * stackmap ) ( funcdata ( f , _FUNCDATA_LocalsPointerMaps ) )
if stackmap == nil || stackmap . n <= 0 {
print ( "runtime: frame " , funcname ( f ) , " untyped locals " , hex ( frame . varp - size ) , "+" , hex ( size ) , "\n" )
2014-12-27 20:58:00 -08:00
throw ( "missing stackmap" )
2014-11-11 17:04:34 -05:00
}
// Locals bitmap information, scan just the pointers in locals.
if pcdata < 0 || pcdata >= stackmap . n {
// don't know where we are
print ( "runtime: pcdata is " , pcdata , " and " , stackmap . n , " locals stack map entries for " , funcname ( f ) , " (targetpc=" , targetpc , ")\n" )
2014-12-27 20:58:00 -08:00
throw ( "bad symbol table" )
2014-11-11 17:04:34 -05:00
}
bv = stackmapdata ( stackmap , pcdata )
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
size = uintptr ( bv . n ) * ptrSize
2014-11-11 17:04:34 -05:00
if stackDebug >= 3 {
print ( " locals " , pcdata , "/" , stackmap . n , " " , size / ptrSize , " words " , bv . bytedata , "\n" )
}
adjustpointers ( unsafe . Pointer ( frame . varp - size ) , & bv , adjinfo , f )
}
2015-01-14 11:09:50 -05:00
// Adjust saved base pointer if there is one.
2015-02-03 09:09:56 -05:00
if thechar == '6' && frame . argp - frame . varp == 2 * regSize {
2015-01-14 11:09:50 -05:00
if ! framepointer_enabled {
2015-02-03 08:35:38 -05:00
print ( "runtime: found space for saved base pointer, but no framepointer experiment\n" )
2015-02-03 09:09:56 -05:00
print ( "argp=" , hex ( frame . argp ) , " varp=" , hex ( frame . varp ) , "\n" )
2015-01-14 11:09:50 -05:00
throw ( "bad frame layout" )
}
if stackDebug >= 3 {
print ( " saved bp\n" )
}
adjustpointer ( adjinfo , unsafe . Pointer ( frame . varp ) )
}
2014-11-11 17:04:34 -05:00
// Adjust arguments.
if frame . arglen > 0 {
var bv bitvector
if frame . argmap != nil {
bv = * frame . argmap
} else {
stackmap := ( * stackmap ) ( funcdata ( f , _FUNCDATA_ArgsPointerMaps ) )
if stackmap == nil || stackmap . n <= 0 {
print ( "runtime: frame " , funcname ( f ) , " untyped args " , frame . argp , "+" , uintptr ( frame . arglen ) , "\n" )
2014-12-27 20:58:00 -08:00
throw ( "missing stackmap" )
2014-11-11 17:04:34 -05:00
}
if pcdata < 0 || pcdata >= stackmap . n {
// don't know where we are
print ( "runtime: pcdata is " , pcdata , " and " , stackmap . n , " args stack map entries for " , funcname ( f ) , " (targetpc=" , targetpc , ")\n" )
2014-12-27 20:58:00 -08:00
throw ( "bad symbol table" )
2014-11-11 17:04:34 -05:00
}
bv = stackmapdata ( stackmap , pcdata )
}
if stackDebug >= 3 {
print ( " args\n" )
}
adjustpointers ( unsafe . Pointer ( frame . argp ) , & bv , adjinfo , nil )
}
return true
}
func adjustctxt ( gp * g , adjinfo * adjustinfo ) {
adjustpointer ( adjinfo , ( unsafe . Pointer ) ( & gp . sched . ctxt ) )
}
func adjustdefers ( gp * g , adjinfo * adjustinfo ) {
// Adjust defer argument blocks the same way we adjust active stack frames.
tracebackdefers ( gp , adjustframe , noescape ( unsafe . Pointer ( adjinfo ) ) )
// Adjust pointers in the Defer structs.
// Defer structs themselves are never on the stack.
for d := gp . _defer ; d != nil ; d = d . link {
adjustpointer ( adjinfo , ( unsafe . Pointer ) ( & d . fn ) )
2014-12-08 14:18:58 -08:00
adjustpointer ( adjinfo , ( unsafe . Pointer ) ( & d . sp ) )
2014-11-11 17:04:34 -05:00
adjustpointer ( adjinfo , ( unsafe . Pointer ) ( & d . _panic ) )
}
}
func adjustpanics ( gp * g , adjinfo * adjustinfo ) {
// Panics are on stack and already adjusted.
// Update pointer to head of list in G.
adjustpointer ( adjinfo , ( unsafe . Pointer ) ( & gp . _panic ) )
}
func adjustsudogs ( gp * g , adjinfo * adjustinfo ) {
// the data elements pointed to by a SudoG structure
// might be in the stack.
for s := gp . waiting ; s != nil ; s = s . waitlink {
adjustpointer ( adjinfo , ( unsafe . Pointer ) ( & s . elem ) )
adjustpointer ( adjinfo , ( unsafe . Pointer ) ( & s . selectdone ) )
}
}
func fillstack ( stk stack , b byte ) {
for p := stk . lo ; p < stk . hi ; p ++ {
* ( * byte ) ( unsafe . Pointer ( p ) ) = b
}
}
// Copies gp's stack to a new stack of a different size.
2014-11-15 08:00:38 -05:00
// Caller must have changed gp status to Gcopystack.
2014-11-11 17:04:34 -05:00
func copystack ( gp * g , newsize uintptr ) {
if gp . syscallsp != 0 {
2014-12-27 20:58:00 -08:00
throw ( "stack growth not allowed in system call" )
2014-11-11 17:04:34 -05:00
}
old := gp . stack
if old . lo == 0 {
2014-12-27 20:58:00 -08:00
throw ( "nil stackbase" )
2014-11-11 17:04:34 -05:00
}
used := old . hi - gp . sched . sp
// allocate new stack
new := stackalloc ( uint32 ( newsize ) )
if stackPoisonCopy != 0 {
fillstack ( new , 0xfd )
}
if stackDebug >= 1 {
print ( "copystack gp=" , gp , " [" , hex ( old . lo ) , " " , hex ( old . hi - used ) , " " , hex ( old . hi ) , "]/" , old . hi - old . lo , " -> [" , hex ( new . lo ) , " " , hex ( new . hi - used ) , " " , hex ( new . hi ) , "]/" , newsize , "\n" )
}
// adjust pointers in the to-be-copied frames
var adjinfo adjustinfo
adjinfo . old = old
adjinfo . delta = new . hi - old . hi
gentraceback ( ^ uintptr ( 0 ) , ^ uintptr ( 0 ) , 0 , gp , 0 , nil , 0x7fffffff , adjustframe , noescape ( unsafe . Pointer ( & adjinfo ) ) , 0 )
// adjust other miscellaneous things that have pointers into stacks.
adjustctxt ( gp , & adjinfo )
adjustdefers ( gp , & adjinfo )
adjustpanics ( gp , & adjinfo )
adjustsudogs ( gp , & adjinfo )
// copy the stack to the new location
if stackPoisonCopy != 0 {
fillstack ( new , 0xfb )
}
memmove ( unsafe . Pointer ( new . hi - used ) , unsafe . Pointer ( old . hi - used ) , used )
// Swap out old stack for new one
gp . stack = new
2015-01-05 16:29:21 +00:00
gp . stackguard0 = new . lo + _StackGuard // NOTE: might clobber a preempt request
2014-11-11 17:04:34 -05:00
gp . sched . sp = new . hi - used
// free old stack
if stackPoisonCopy != 0 {
fillstack ( old , 0xfc )
}
if newsize > old . hi - old . lo {
// growing, free stack immediately
stackfree ( old )
} else {
// shrinking, queue up free operation. We can't actually free the stack
// just yet because we might run into the following situation:
// 1) GC starts, scans a SudoG but does not yet mark the SudoG.elem pointer
// 2) The stack that pointer points to is shrunk
// 3) The old stack is freed
// 4) The containing span is marked free
// 5) GC attempts to mark the SudoG.elem pointer. The marking fails because
// the pointer looks like a pointer into a free span.
// By not freeing, we prevent step #4 until GC is done.
lock ( & stackpoolmu )
* ( * stack ) ( unsafe . Pointer ( old . lo ) ) = stackfreequeue
stackfreequeue = old
unlock ( & stackpoolmu )
}
}
// round x up to a power of 2.
func round2 ( x int32 ) int32 {
s := uint ( 0 )
for 1 << s < x {
s ++
}
return 1 << s
}
// Called from runtime·morestack when more stack is needed.
// Allocate larger stack and relocate to new stack.
// Stack growth is multiplicative, for constant amortized cost.
//
// g->atomicstatus will be Grunning or Gscanrunning upon entry.
// If the GC is trying to stop this g then it will set preemptscan to true.
func newstack ( ) {
thisg := getg ( )
// TODO: double check all gp. shouldn't be getg().
2014-12-22 10:53:51 -05:00
if thisg . m . morebuf . g . ptr ( ) . stackguard0 == stackFork {
2014-12-27 20:58:00 -08:00
throw ( "stack growth after fork" )
2014-11-11 17:04:34 -05:00
}
2014-12-22 10:53:51 -05:00
if thisg . m . morebuf . g . ptr ( ) != thisg . m . curg {
2014-11-11 17:04:34 -05:00
print ( "runtime: newstack called from g=" , thisg . m . morebuf . g , "\n" + "\tm=" , thisg . m , " m->curg=" , thisg . m . curg , " m->g0=" , thisg . m . g0 , " m->gsignal=" , thisg . m . gsignal , "\n" )
morebuf := thisg . m . morebuf
2014-12-22 10:53:51 -05:00
traceback ( morebuf . pc , morebuf . sp , morebuf . lr , morebuf . g . ptr ( ) )
2014-12-27 20:58:00 -08:00
throw ( "runtime: wrong goroutine in newstack" )
2014-11-11 17:04:34 -05:00
}
if thisg . m . curg . throwsplit {
gp := thisg . m . curg
// Update syscallsp, syscallpc in case traceback uses them.
morebuf := thisg . m . morebuf
gp . syscallsp = morebuf . sp
gp . syscallpc = morebuf . pc
print ( "runtime: newstack sp=" , hex ( gp . sched . sp ) , " stack=[" , hex ( gp . stack . lo ) , ", " , hex ( gp . stack . hi ) , "]\n" ,
"\tmorebuf={pc:" , hex ( morebuf . pc ) , " sp:" , hex ( morebuf . sp ) , " lr:" , hex ( morebuf . lr ) , "}\n" ,
"\tsched={pc:" , hex ( gp . sched . pc ) , " sp:" , hex ( gp . sched . sp ) , " lr:" , hex ( gp . sched . lr ) , " ctxt:" , gp . sched . ctxt , "}\n" )
2014-12-22 10:53:51 -05:00
traceback ( morebuf . pc , morebuf . sp , morebuf . lr , gp )
2014-12-27 20:58:00 -08:00
throw ( "runtime: stack split at bad time" )
2014-11-11 17:04:34 -05:00
}
gp := thisg . m . curg
morebuf := thisg . m . morebuf
thisg . m . morebuf . pc = 0
thisg . m . morebuf . lr = 0
thisg . m . morebuf . sp = 0
2014-12-22 10:53:51 -05:00
thisg . m . morebuf . g = 0
2015-01-13 15:55:16 -05:00
rewindmorestack ( & gp . sched )
2015-01-14 16:36:41 -05:00
// NOTE: stackguard0 may change underfoot, if another thread
// is about to try to preempt gp. Read it just once and use that same
// value now and below.
preempt := atomicloaduintptr ( & gp . stackguard0 ) == stackPreempt
2015-01-13 15:55:16 -05:00
// Be conservative about where we preempt.
// We are interested in preempting user Go code, not runtime code.
2015-01-30 15:30:41 -05:00
// If we're holding locks, mallocing, or preemption is disabled, don't
// preempt.
2015-01-13 15:55:16 -05:00
// This check is very early in newstack so that even the status change
// from Grunning to Gwaiting and back doesn't happen in this case.
// That status change by itself can be viewed as a small preemption,
// because the GC might change Gwaiting to Gscanwaiting, and then
// this goroutine has to wait for the GC to finish before continuing.
// If the GC is in some way dependent on this goroutine (for example,
// it needs a lock held by the goroutine), that small preemption turns
// into a real deadlock.
2015-01-14 16:36:41 -05:00
if preempt {
2015-04-17 00:21:30 -04:00
if thisg . m . locks != 0 || thisg . m . mallocing != 0 || thisg . m . preemptoff != "" || thisg . m . p . ptr ( ) . status != _Prunning {
2015-01-13 15:55:16 -05:00
// Let the goroutine keep running for now.
// gp->preempt is set, so it will be preempted next time.
gp . stackguard0 = gp . stack . lo + _StackGuard
gogo ( & gp . sched ) // never return
}
}
2014-11-11 17:04:34 -05:00
2015-01-13 15:55:16 -05:00
// The goroutine must be executing in order to call newstack,
// so it must be Grunning (or Gscanrunning).
2014-11-11 17:04:34 -05:00
casgstatus ( gp , _Grunning , _Gwaiting )
gp . waitreason = "stack growth"
if gp . stack . lo == 0 {
2014-12-27 20:58:00 -08:00
throw ( "missing stack in newstack" )
2014-11-11 17:04:34 -05:00
}
sp := gp . sched . sp
if thechar == '6' || thechar == '8' {
// The call to morestack cost a word.
sp -= ptrSize
}
if stackDebug >= 1 || sp < gp . stack . lo {
print ( "runtime: newstack sp=" , hex ( sp ) , " stack=[" , hex ( gp . stack . lo ) , ", " , hex ( gp . stack . hi ) , "]\n" ,
"\tmorebuf={pc:" , hex ( morebuf . pc ) , " sp:" , hex ( morebuf . sp ) , " lr:" , hex ( morebuf . lr ) , "}\n" ,
"\tsched={pc:" , hex ( gp . sched . pc ) , " sp:" , hex ( gp . sched . sp ) , " lr:" , hex ( gp . sched . lr ) , " ctxt:" , gp . sched . ctxt , "}\n" )
}
if sp < gp . stack . lo {
print ( "runtime: gp=" , gp , ", gp->status=" , hex ( readgstatus ( gp ) ) , "\n " )
print ( "runtime: split stack overflow: " , hex ( sp ) , " < " , hex ( gp . stack . lo ) , "\n" )
2014-12-27 20:58:00 -08:00
throw ( "runtime: split stack overflow" )
2014-11-11 17:04:34 -05:00
}
2014-11-15 08:00:38 -05:00
if gp . sched . ctxt != nil {
// morestack wrote sched.ctxt on its way in here,
// without a write barrier. Run the write barrier now.
// It is not possible to be preempted between then
// and now, so it's okay.
writebarrierptr_nostore ( ( * uintptr ) ( unsafe . Pointer ( & gp . sched . ctxt ) ) , uintptr ( gp . sched . ctxt ) )
}
2015-01-14 16:36:41 -05:00
if preempt {
2014-11-11 17:04:34 -05:00
if gp == thisg . m . g0 {
2014-12-27 20:58:00 -08:00
throw ( "runtime: preempt g0" )
2014-11-11 17:04:34 -05:00
}
2015-04-17 00:21:30 -04:00
if thisg . m . p == 0 && thisg . m . locks == 0 {
2014-12-27 20:58:00 -08:00
throw ( "runtime: g is running but p is not" )
2014-11-11 17:04:34 -05:00
}
if gp . preemptscan {
2014-11-21 16:46:27 -05:00
for ! castogscanstatus ( gp , _Gwaiting , _Gscanwaiting ) {
// Likely to be racing with the GC as it sees a _Gwaiting and does the stack scan.
// If so this stack will be scanned twice which does not change correctness.
}
2014-11-11 17:04:34 -05:00
gcphasework ( gp )
2014-11-21 16:46:27 -05:00
casfrom_Gscanstatus ( gp , _Gscanwaiting , _Gwaiting )
2014-11-11 17:04:34 -05:00
casgstatus ( gp , _Gwaiting , _Grunning )
2015-01-05 16:29:21 +00:00
gp . stackguard0 = gp . stack . lo + _StackGuard
2014-11-11 17:04:34 -05:00
gp . preempt = false
gp . preemptscan = false // Tells the GC premption was successful.
gogo ( & gp . sched ) // never return
}
// Act like goroutine called runtime.Gosched.
casgstatus ( gp , _Gwaiting , _Grunning )
2014-12-12 18:41:57 +01:00
gopreempt_m ( gp ) // never return
2014-11-11 17:04:34 -05:00
}
// Allocate a bigger segment and move the stack.
oldsize := int ( gp . stack . hi - gp . stack . lo )
newsize := oldsize * 2
if uintptr ( newsize ) > maxstacksize {
print ( "runtime: goroutine stack exceeds " , maxstacksize , "-byte limit\n" )
2014-12-27 20:58:00 -08:00
throw ( "stack overflow" )
2014-11-11 17:04:34 -05:00
}
2014-12-05 11:40:41 -05:00
casgstatus ( gp , _Gwaiting , _Gcopystack )
2014-11-15 08:00:38 -05:00
// The concurrent GC will not scan the stack while we are doing the copy since
// the gp is in a Gcopystack status.
2014-11-11 17:04:34 -05:00
copystack ( gp , uintptr ( newsize ) )
if stackDebug >= 1 {
print ( "stack grow done\n" )
}
2014-11-15 08:00:38 -05:00
casgstatus ( gp , _Gcopystack , _Grunning )
2014-11-11 17:04:34 -05:00
gogo ( & gp . sched )
}
//go:nosplit
func nilfunc ( ) {
* ( * uint8 ) ( nil ) = 0
}
// adjust Gobuf as if it executed a call to fn
// and then did an immediate gosave.
func gostartcallfn ( gobuf * gobuf , fv * funcval ) {
var fn unsafe . Pointer
if fv != nil {
fn = ( unsafe . Pointer ) ( fv . fn )
} else {
fn = unsafe . Pointer ( funcPC ( nilfunc ) )
}
gostartcall ( gobuf , fn , ( unsafe . Pointer ) ( fv ) )
}
// Maybe shrink the stack being used by gp.
// Called at garbage collection time.
func shrinkstack ( gp * g ) {
if readgstatus ( gp ) == _Gdead {
if gp . stack . lo != 0 {
// Free whole stack - it will get reallocated
// if G is used again.
stackfree ( gp . stack )
gp . stack . lo = 0
gp . stack . hi = 0
}
return
}
if gp . stack . lo == 0 {
2014-12-27 20:58:00 -08:00
throw ( "missing stack in shrinkstack" )
2014-11-11 17:04:34 -05:00
}
oldsize := gp . stack . hi - gp . stack . lo
newsize := oldsize / 2
if newsize < _FixedStack {
return // don't shrink below the minimum-sized stack
}
used := gp . stack . hi - gp . sched . sp
if used >= oldsize / 4 {
return // still using at least 1/4 of the segment.
}
// We can't copy the stack if we're in a syscall.
// The syscall might have pointers into the stack.
if gp . syscallsp != 0 {
return
}
2014-11-24 12:07:11 -05:00
if goos_windows != 0 && gp . m != nil && gp . m . libcallsp != 0 {
2014-11-11 17:04:34 -05:00
return
}
if stackDebug > 0 {
print ( "shrinking stack " , oldsize , "->" , newsize , "\n" )
}
2014-11-15 08:00:38 -05:00
2014-12-05 11:40:41 -05:00
oldstatus := casgcopystack ( gp )
2014-11-11 17:04:34 -05:00
copystack ( gp , newsize )
2014-11-15 08:00:38 -05:00
casgstatus ( gp , _Gcopystack , oldstatus )
2014-11-11 17:04:34 -05:00
}
// Do any delayed stack freeing that was queued up during GC.
func shrinkfinish ( ) {
lock ( & stackpoolmu )
s := stackfreequeue
stackfreequeue = stack { }
unlock ( & stackpoolmu )
for s . lo != 0 {
t := * ( * stack ) ( unsafe . Pointer ( s . lo ) )
stackfree ( s )
s = t
}
}
2015-01-05 16:29:21 +00:00
//go:nosplit
func morestackc ( ) {
systemstack ( func ( ) {
throw ( "attempt to execute C code on Go stack" )
} )
}