2014-11-11 17:04:34 -05:00
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runtime
2015-11-02 14:09:24 -05:00
import (
2021-01-28 15:23:05 +00:00
"internal/abi"
2019-08-01 16:22:28 +00:00
"internal/cpu"
2021-06-17 19:10:18 +00:00
"internal/goarch"
2025-09-25 17:26:03 +01:00
"internal/goexperiment"
2021-06-16 21:52:24 +00:00
"internal/goos"
2024-02-01 10:21:14 +08:00
"internal/runtime/atomic"
2025-03-04 19:02:48 +00:00
"internal/runtime/gc"
2024-07-23 11:43:23 -04:00
"internal/runtime/sys"
2025-11-19 09:28:16 +01:00
"math/bits"
2015-11-02 14:09:24 -05:00
"unsafe"
)
2014-11-11 17:04:34 -05:00
2015-10-16 18:45:30 -07:00
/ *
Stack layout parameters .
Included both by runtime ( compiled via 6 c ) and linkers ( compiled via gcc ) .
The per - goroutine g - > stackguard is set to point StackGuard bytes
above the bottom of the stack . Each function compares its stack
pointer against g - > stackguard to check for overflow . To cut one
instruction from the check sequence for functions with tiny frames ,
the stack is allowed to protrude StackSmall bytes below the stack
guard . Functions with large frames don ' t bother with the check and
always call morestack . The sequences are ( for amd64 , others are
similar ) :
guard = g - > stackguard
frame = function ' s stack frame size
argsize = size of function arguments ( call + return )
stack frame size <= StackSmall :
CMPQ guard , SP
JHI 3 ( PC )
MOVQ m - > morearg , $ ( argsize << 32 )
CALL morestack ( SB )
stack frame size > StackSmall but < StackBig
LEAQ ( frame - StackSmall ) ( SP ) , R0
CMPQ guard , R0
JHI 3 ( PC )
MOVQ m - > morearg , $ ( argsize << 32 )
CALL morestack ( SB )
stack frame size >= StackBig :
MOVQ m - > morearg , $ ( ( argsize << 32 ) | frame )
CALL morestack ( SB )
The bottom StackGuard - StackSmall bytes are important : there has
to be enough room to execute functions that refuse to check for
stack overflow , either because they need to be adjacent to the
actual caller ' s frame ( deferproc ) or because they handle the imminent
stack overflow ( morestack ) .
For example , deferproc might call malloc , which does one of the
above checks ( without allocating a full frame ) , which might trigger
a call to morestack . This sequence needs to fit in the bottom
section of the stack . On amd64 , morestack ' s frame is 40 bytes , and
deferproc ' s frame is 56 bytes . That fits well within the
StackGuard - StackSmall bytes at the bottom .
The linkers explore all possible call traces involving non - splitting
functions to make sure that this limit cannot be violated .
* /
const (
2023-04-19 14:58:47 -04:00
// stackSystem is a number of additional bytes to add
2015-10-16 18:45:30 -07:00
// to each stack below the usual guard area for OS-specific
// purposes like signal handling. Used on Windows, Plan 9,
2018-06-07 12:19:42 +02:00
// and iOS because they do not use a separate stack.
2024-11-12 23:23:12 +01:00
stackSystem = goos . IsWindows * 4096 + goos . IsPlan9 * 512 + goos . IsIos * goarch . IsArm64 * 1024
2015-10-16 18:45:30 -07:00
// The minimum size of stack used by Go code
2023-04-19 14:58:47 -04:00
stackMin = 2048
2015-10-16 18:45:30 -07:00
// The minimum stack size to allocate.
2023-04-19 14:58:47 -04:00
// The hackery here rounds fixedStack0 up to a power of 2.
fixedStack0 = stackMin + stackSystem
fixedStack1 = fixedStack0 - 1
fixedStack2 = fixedStack1 | ( fixedStack1 >> 1 )
fixedStack3 = fixedStack2 | ( fixedStack2 >> 2 )
fixedStack4 = fixedStack3 | ( fixedStack3 >> 4 )
fixedStack5 = fixedStack4 | ( fixedStack4 >> 8 )
fixedStack6 = fixedStack5 | ( fixedStack5 >> 16 )
fixedStack = fixedStack6 + 1
2015-10-16 18:45:30 -07:00
2023-04-19 14:01:05 -04:00
// stackNosplit is the maximum number of bytes that a chain of NOSPLIT
// functions can use.
// This arithmetic must match that in cmd/internal/objabi/stack.go:StackNosplit.
stackNosplit = abi . StackNosplitBase * sys . StackGuardMultiplier
2015-10-16 18:45:30 -07:00
// The stack guard is a pointer this many bytes above the
// bottom of the stack.
2021-04-02 15:57:46 -04:00
//
2023-04-19 14:01:05 -04:00
// The guard leaves enough room for a stackNosplit chain of NOSPLIT calls
// plus one stackSmall frame plus stackSystem bytes for the OS.
2022-11-16 12:56:40 -08:00
// This arithmetic must match that in cmd/internal/objabi/stack.go:StackLimit.
2023-04-19 14:58:47 -04:00
stackGuard = stackNosplit + stackSystem + abi . StackSmall
2015-10-16 18:45:30 -07:00
)
2014-11-11 17:04:34 -05:00
const (
2015-05-01 15:53:45 +10:00
// stackDebug == 0: no logging
2014-11-11 17:04:34 -05:00
// == 1: logging of per-stack operations
// == 2: logging of per-frame operations
// == 3: logging of per-word updates
// == 4: logging of per-word reads
stackDebug = 0
stackFromSystem = 0 // allocate stacks from system memory instead of the heap
stackFaultOnFree = 0 // old stacks are mapped noaccess to detect use after free
2017-05-18 14:35:53 -04:00
stackNoCache = 0 // disable per-P small stack caches
2016-12-02 15:17:52 -08:00
// check the BP links during traceback.
debugCheckBP = false
2014-11-11 17:04:34 -05:00
)
2023-04-26 10:07:02 +02:00
var (
stackPoisonCopy = 0 // fill stack that should not be accessed with garbage, to detect bad dereferences during copy
)
2014-11-11 17:04:34 -05:00
const (
2021-06-16 23:05:44 +00:00
uintptrMask = 1 << ( 8 * goarch . PtrSize ) - 1
2014-11-11 17:04:34 -05:00
2021-04-02 15:57:46 -04:00
// The values below can be stored to g.stackguard0 to force
// the next stack check to fail.
// These are all larger than any real SP.
2014-11-11 17:04:34 -05:00
// Goroutine preemption request.
// 0xfffffade in hex.
stackPreempt = uintptrMask & - 1314
2021-04-02 15:57:46 -04:00
// Thread is forking. Causes a split stack check failure.
// 0xfffffb2e in hex.
2014-11-11 17:04:34 -05:00
stackFork = uintptrMask & - 1234
2021-04-01 16:50:53 -04:00
// Force a stack movement. Used for debugging.
// 0xfffffeed in hex.
stackForceMove = uintptrMask & - 275
2021-09-15 17:14:21 -04:00
// stackPoisonMin is the lowest allowed stack poison value.
stackPoisonMin = uintptrMask & - 4096
2014-11-11 17:04:34 -05:00
)
// Global pool of spans that have free stacks.
// Stacks are assigned an order according to size.
2022-02-03 14:12:08 -05:00
//
// order = log_2(size/FixedStack)
//
2014-11-11 17:04:34 -05:00
// There is a free list for each order.
2019-08-01 16:22:28 +00:00
var stackpool [ _NumStackOrders ] struct {
item stackpoolItem
2022-06-25 13:27:11 -07:00
_ [ ( cpu . CacheLinePadSize - unsafe . Sizeof ( stackpoolItem { } ) % cpu . CacheLinePadSize ) % cpu . CacheLinePadSize ] byte
2019-08-01 16:22:28 +00:00
}
type stackpoolItem struct {
2022-08-07 17:43:57 +07:00
_ sys . NotInHeap
2019-08-01 16:22:28 +00:00
mu mutex
span mSpanList
}
2014-11-11 17:04:34 -05:00
2015-12-14 14:30:25 -05:00
// Global pool of large stack spans.
var stackLarge struct {
lock mutex
2025-03-04 19:02:48 +00:00
free [ heapAddrBits - gc . PageShift ] mSpanList // free lists by log_2(s.npages)
2015-12-14 14:30:25 -05:00
}
2014-11-11 17:04:34 -05:00
func stackinit ( ) {
2025-03-04 19:02:48 +00:00
if _StackCacheSize & pageMask != 0 {
2014-12-27 20:58:00 -08:00
throw ( "cache size must be a multiple of page size" )
2014-11-11 17:04:34 -05:00
}
for i := range stackpool {
2019-08-01 16:22:28 +00:00
stackpool [ i ] . item . span . init ( )
runtime: static lock ranking for the runtime (enabled by GOEXPERIMENT)
I took some of the infrastructure from Austin's lock logging CR
https://go-review.googlesource.com/c/go/+/192704 (with deadlock
detection from the logs), and developed a setup to give static lock
ranking for runtime locks.
Static lock ranking establishes a documented total ordering among locks,
and then reports an error if the total order is violated. This can
happen if a deadlock happens (by acquiring a sequence of locks in
different orders), or if just one side of a possible deadlock happens.
Lock ordering deadlocks cannot happen as long as the lock ordering is
followed.
Along the way, I found a deadlock involving the new timer code, which Ian fixed
via https://go-review.googlesource.com/c/go/+/207348, as well as two other
potential deadlocks.
See the constants at the top of runtime/lockrank.go to show the static
lock ranking that I ended up with, along with some comments. This is
great documentation of the current intended lock ordering when acquiring
multiple locks in the runtime.
I also added an array lockPartialOrder[] which shows and enforces the
current partial ordering among locks (which is embedded within the total
ordering). This is more specific about the dependencies among locks.
I don't try to check the ranking within a lock class with multiple locks
that can be acquired at the same time (i.e. check the ranking when
multiple hchan locks are acquired).
Currently, I am doing a lockInit() call to set the lock rank of most
locks. Any lock that is not otherwise initialized is assumed to be a
leaf lock (a very high rank lock), so that eliminates the need to do
anything for a bunch of locks (including all architecture-dependent
locks). For two locks, root.lock and notifyList.lock (only in the
runtime/sema.go file), it is not as easy to do lock initialization, so
instead, I am passing the lock rank with the lock calls.
For Windows compilation, I needed to increase the StackGuard size from
896 to 928 because of the new lock-rank checking functions.
Checking of the static lock ranking is enabled by setting
GOEXPERIMENT=staticlockranking before doing a run.
To make sure that the static lock ranking code has no overhead in memory
or CPU when not enabled by GOEXPERIMENT, I changed 'go build/install' so
that it defines a build tag (with the same name) whenever any experiment
has been baked into the toolchain (by checking Expstring()). This allows
me to avoid increasing the size of the 'mutex' type when static lock
ranking is not enabled.
Fixes #38029
Change-Id: I154217ff307c47051f8dae9c2a03b53081acd83a
Reviewed-on: https://go-review.googlesource.com/c/go/+/207619
Reviewed-by: Dan Scales <danscales@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Dan Scales <danscales@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2019-11-13 17:34:47 -08:00
lockInit ( & stackpool [ i ] . item . mu , lockRankStackpool )
2014-11-11 17:04:34 -05:00
}
2015-12-14 14:30:25 -05:00
for i := range stackLarge . free {
stackLarge . free [ i ] . init ( )
runtime: static lock ranking for the runtime (enabled by GOEXPERIMENT)
I took some of the infrastructure from Austin's lock logging CR
https://go-review.googlesource.com/c/go/+/192704 (with deadlock
detection from the logs), and developed a setup to give static lock
ranking for runtime locks.
Static lock ranking establishes a documented total ordering among locks,
and then reports an error if the total order is violated. This can
happen if a deadlock happens (by acquiring a sequence of locks in
different orders), or if just one side of a possible deadlock happens.
Lock ordering deadlocks cannot happen as long as the lock ordering is
followed.
Along the way, I found a deadlock involving the new timer code, which Ian fixed
via https://go-review.googlesource.com/c/go/+/207348, as well as two other
potential deadlocks.
See the constants at the top of runtime/lockrank.go to show the static
lock ranking that I ended up with, along with some comments. This is
great documentation of the current intended lock ordering when acquiring
multiple locks in the runtime.
I also added an array lockPartialOrder[] which shows and enforces the
current partial ordering among locks (which is embedded within the total
ordering). This is more specific about the dependencies among locks.
I don't try to check the ranking within a lock class with multiple locks
that can be acquired at the same time (i.e. check the ranking when
multiple hchan locks are acquired).
Currently, I am doing a lockInit() call to set the lock rank of most
locks. Any lock that is not otherwise initialized is assumed to be a
leaf lock (a very high rank lock), so that eliminates the need to do
anything for a bunch of locks (including all architecture-dependent
locks). For two locks, root.lock and notifyList.lock (only in the
runtime/sema.go file), it is not as easy to do lock initialization, so
instead, I am passing the lock rank with the lock calls.
For Windows compilation, I needed to increase the StackGuard size from
896 to 928 because of the new lock-rank checking functions.
Checking of the static lock ranking is enabled by setting
GOEXPERIMENT=staticlockranking before doing a run.
To make sure that the static lock ranking code has no overhead in memory
or CPU when not enabled by GOEXPERIMENT, I changed 'go build/install' so
that it defines a build tag (with the same name) whenever any experiment
has been baked into the toolchain (by checking Expstring()). This allows
me to avoid increasing the size of the 'mutex' type when static lock
ranking is not enabled.
Fixes #38029
Change-Id: I154217ff307c47051f8dae9c2a03b53081acd83a
Reviewed-on: https://go-review.googlesource.com/c/go/+/207619
Reviewed-by: Dan Scales <danscales@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Dan Scales <danscales@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2019-11-13 17:34:47 -08:00
lockInit ( & stackLarge . lock , lockRankStackLarge )
2015-12-14 14:30:25 -05:00
}
}
// stacklog2 returns ⌊log_2(n)⌋.
func stacklog2 ( n uintptr ) int {
2025-11-19 09:28:16 +01:00
if n == 0 {
return 0
2015-12-14 14:30:25 -05:00
}
2025-11-19 09:28:16 +01:00
return bits . Len64 ( uint64 ( n ) )
2014-11-11 17:04:34 -05:00
}
2016-03-01 23:21:55 +00:00
// Allocates a stack from the free pool. Must be called with
2019-08-01 16:22:28 +00:00
// stackpool[order].item.mu held.
2014-11-20 12:08:13 -05:00
func stackpoolalloc ( order uint8 ) gclinkptr {
2019-08-01 16:22:28 +00:00
list := & stackpool [ order ] . item . span
2015-10-15 15:59:49 -07:00
s := list . first
runtime: static lock ranking for the runtime (enabled by GOEXPERIMENT)
I took some of the infrastructure from Austin's lock logging CR
https://go-review.googlesource.com/c/go/+/192704 (with deadlock
detection from the logs), and developed a setup to give static lock
ranking for runtime locks.
Static lock ranking establishes a documented total ordering among locks,
and then reports an error if the total order is violated. This can
happen if a deadlock happens (by acquiring a sequence of locks in
different orders), or if just one side of a possible deadlock happens.
Lock ordering deadlocks cannot happen as long as the lock ordering is
followed.
Along the way, I found a deadlock involving the new timer code, which Ian fixed
via https://go-review.googlesource.com/c/go/+/207348, as well as two other
potential deadlocks.
See the constants at the top of runtime/lockrank.go to show the static
lock ranking that I ended up with, along with some comments. This is
great documentation of the current intended lock ordering when acquiring
multiple locks in the runtime.
I also added an array lockPartialOrder[] which shows and enforces the
current partial ordering among locks (which is embedded within the total
ordering). This is more specific about the dependencies among locks.
I don't try to check the ranking within a lock class with multiple locks
that can be acquired at the same time (i.e. check the ranking when
multiple hchan locks are acquired).
Currently, I am doing a lockInit() call to set the lock rank of most
locks. Any lock that is not otherwise initialized is assumed to be a
leaf lock (a very high rank lock), so that eliminates the need to do
anything for a bunch of locks (including all architecture-dependent
locks). For two locks, root.lock and notifyList.lock (only in the
runtime/sema.go file), it is not as easy to do lock initialization, so
instead, I am passing the lock rank with the lock calls.
For Windows compilation, I needed to increase the StackGuard size from
896 to 928 because of the new lock-rank checking functions.
Checking of the static lock ranking is enabled by setting
GOEXPERIMENT=staticlockranking before doing a run.
To make sure that the static lock ranking code has no overhead in memory
or CPU when not enabled by GOEXPERIMENT, I changed 'go build/install' so
that it defines a build tag (with the same name) whenever any experiment
has been baked into the toolchain (by checking Expstring()). This allows
me to avoid increasing the size of the 'mutex' type when static lock
ranking is not enabled.
Fixes #38029
Change-Id: I154217ff307c47051f8dae9c2a03b53081acd83a
Reviewed-on: https://go-review.googlesource.com/c/go/+/207619
Reviewed-by: Dan Scales <danscales@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Dan Scales <danscales@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2019-11-13 17:34:47 -08:00
lockWithRankMayAcquire ( & mheap_ . lock , lockRankMheap )
2015-10-15 15:59:49 -07:00
if s == nil {
2016-03-01 23:21:55 +00:00
// no free stacks. Allocate another span worth.
2025-03-04 19:02:48 +00:00
s = mheap_ . allocManual ( _StackCacheSize >> gc . PageShift , spanAllocStack )
2014-11-11 17:04:34 -05:00
if s == nil {
2014-12-27 20:58:00 -08:00
throw ( "out of memory" )
2014-11-11 17:04:34 -05:00
}
2016-02-16 17:16:43 -05:00
if s . allocCount != 0 {
throw ( "bad allocCount" )
2014-11-11 17:04:34 -05:00
}
2017-03-16 15:02:02 -04:00
if s . manualFreeList . ptr ( ) != nil {
throw ( "bad manualFreeList" )
2014-11-11 17:04:34 -05:00
}
2018-06-29 14:56:48 -04:00
osStackAlloc ( s )
2023-04-19 14:58:47 -04:00
s . elemsize = fixedStack << order
2017-03-16 14:55:10 -04:00
for i := uintptr ( 0 ) ; i < _StackCacheSize ; i += s . elemsize {
2016-04-28 10:59:00 -04:00
x := gclinkptr ( s . base ( ) + i )
runtime: add valgrind instrumentation
Add build tag gated Valgrind annotations to the runtime which let it
understand how the runtime manages memory. This allows for Go binaries
to be run under Valgrind without emitting spurious errors.
Instead of adding the Valgrind headers to the tree, and using cgo to
call the various Valgrind client request macros, we just add an assembly
function which emits the necessary instructions to trigger client
requests.
In particular we add instrumentation of the memory allocator, using a
two-level mempool structure (as described in the Valgrind manual [0]).
We also add annotations which allow Valgrind to track which memory we
use for stacks, which seems necessary to let it properly function.
We describe the memory model to Valgrind as follows: we treat heap
arenas as a "pool" created with VALGRIND_CREATE_MEMPOOL_EXT (so that we
can use VALGRIND_MEMPOOL_METAPOOL and VALGRIND_MEMPOOL_AUTO_FREE).
Within the pool we treat spans as "superblocks", annotated with
VALGRIND_MEMPOOL_ALLOC. We then allocate individual objects within spans
with VALGRIND_MALLOCLIKE_BLOCK.
It should be noted that running binaries under Valgrind can be _quite
slow_, and certain operations, such as running the GC, can be _very
slow_. It is recommended to run programs with GOGC=off. Additionally,
async preemption should be turned off, since it'll cause strange
behavior (GODEBUG=asyncpreemptoff=1).
Running Valgrind with --leak-check=yes will result in some errors
resulting from some things not being marked fully free'd. These likely
need more annotations to rectify, but for now it is recommended to run
with --leak-check=off.
Updates #73602
[0] https://valgrind.org/docs/manual/mc-manual.html#mc-manual.mempools
Change-Id: I71b26c47d7084de71ef1e03947ef6b1cc6d38301
Reviewed-on: https://go-review.googlesource.com/c/go/+/674077
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
2025-03-22 00:58:55 +00:00
if valgrindenabled {
// The address of x.ptr() becomes the base of stacks. We need to
// mark it allocated here and in stackfree and stackpoolfree, and free'd in
// stackalloc in order to avoid overlapping allocations and
// uninitialized memory errors in valgrind.
valgrindMalloc ( unsafe . Pointer ( x . ptr ( ) ) , unsafe . Sizeof ( x . ptr ( ) ) )
}
2017-03-16 15:02:02 -04:00
x . ptr ( ) . next = s . manualFreeList
s . manualFreeList = x
2014-11-11 17:04:34 -05:00
}
2015-11-11 16:13:51 -08:00
list . insert ( s )
2014-11-11 17:04:34 -05:00
}
2017-03-16 15:02:02 -04:00
x := s . manualFreeList
2014-11-20 12:08:13 -05:00
if x . ptr ( ) == nil {
2014-12-27 20:58:00 -08:00
throw ( "span has no free stacks" )
2014-11-11 17:04:34 -05:00
}
2017-03-16 15:02:02 -04:00
s . manualFreeList = x . ptr ( ) . next
2016-02-16 17:16:43 -05:00
s . allocCount ++
2017-03-16 15:02:02 -04:00
if s . manualFreeList . ptr ( ) == nil {
2014-11-11 17:04:34 -05:00
// all stacks in s are allocated.
2015-11-11 16:13:51 -08:00
list . remove ( s )
2014-11-11 17:04:34 -05:00
}
return x
}
2019-08-01 16:22:28 +00:00
// Adds stack x to the free pool. Must be called with stackpool[order].item.mu held.
2014-11-20 12:08:13 -05:00
func stackpoolfree ( x gclinkptr , order uint8 ) {
2017-12-04 10:58:15 -05:00
s := spanOfUnchecked ( uintptr ( x ) )
runtime: atomically set span state and use as publication barrier
When everything is working correctly, any pointer the garbage
collector encounters can only point into a fully initialized heap
span, since the span must have been initialized before that pointer
could escape the heap allocator and become visible to the GC.
However, in various cases, we try to be defensive against bad
pointers. In findObject, this is just a sanity check: we never expect
to find a bad pointer, but programming errors can lead to them. In
spanOfHeap, we don't necessarily trust the pointer and we're trying to
check if it really does point to the heap, though it should always
point to something. Conservative scanning takes this to a new level,
since it can only guess that a word may be a pointer and verify this.
In all of these cases, we have a problem that the span lookup and
check can race with span initialization, since the span becomes
visible to lookups before it's fully initialized.
Furthermore, we're about to start initializing the span without the
heap lock held, which is going to introduce races where accesses were
previously protected by the heap lock.
To address this, this CL makes accesses to mspan.state atomic, and
ensures that the span is fully initialized before setting the state to
mSpanInUse. All loads are now atomic, and in any case where we don't
trust the pointer, it first atomically loads the span state and checks
that it's mSpanInUse, after which it will have synchronized with span
initialization and can safely check the other span fields.
For #10958, #24543, but a good fix in general.
Change-Id: I518b7c63555b02064b98aa5f802c92b758fef853
Reviewed-on: https://go-review.googlesource.com/c/go/+/203286
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
2019-10-23 11:25:38 -04:00
if s . state . get ( ) != mSpanManual {
2014-12-27 20:58:00 -08:00
throw ( "freeing stack not in a stack span" )
2014-11-11 17:04:34 -05:00
}
2017-03-16 15:02:02 -04:00
if s . manualFreeList . ptr ( ) == nil {
2014-11-11 17:04:34 -05:00
// s will now have a free stack
2019-08-01 16:22:28 +00:00
stackpool [ order ] . item . span . insert ( s )
2014-11-11 17:04:34 -05:00
}
2017-03-16 15:02:02 -04:00
x . ptr ( ) . next = s . manualFreeList
s . manualFreeList = x
2016-02-16 17:16:43 -05:00
s . allocCount --
if gcphase == _GCoff && s . allocCount == 0 {
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
// Span is completely free. Return it to the heap
// immediately if we're sweeping.
//
// If GC is active, we delay the free until the end of
// GC to avoid the following type of situation:
//
// 1) GC starts, scans a SudoG but does not yet mark the SudoG.elem pointer
// 2) The stack that pointer points to is copied
// 3) The old stack is freed
// 4) The containing span is marked free
// 5) GC attempts to mark the SudoG.elem pointer. The
// marking fails because the pointer looks like a
// pointer into a free span.
//
// By not freeing, we prevent step #4 until GC is done.
2019-08-01 16:22:28 +00:00
stackpool [ order ] . item . span . remove ( s )
2017-03-16 15:02:02 -04:00
s . manualFreeList = 0
2018-06-29 14:56:48 -04:00
osStackFree ( s )
2020-07-29 19:00:37 +00:00
mheap_ . freeManual ( s , spanAllocStack )
2014-11-11 17:04:34 -05:00
}
}
// stackcacherefill/stackcacherelease implement a global pool of stack segments.
// The pool is required to prevent unlimited growth of per-thread caches.
2016-05-27 12:21:14 -04:00
//
//go:systemstack
2014-11-11 17:04:34 -05:00
func stackcacherefill ( c * mcache , order uint8 ) {
if stackDebug >= 1 {
print ( "stackcacherefill order=" , order , "\n" )
}
// Grab some stacks from the global cache.
// Grab half of the allowed capacity (to prevent thrashing).
2014-11-20 12:08:13 -05:00
var list gclinkptr
2014-11-11 17:04:34 -05:00
var size uintptr
2019-08-01 16:22:28 +00:00
lock ( & stackpool [ order ] . item . mu )
2014-11-11 17:04:34 -05:00
for size < _StackCacheSize / 2 {
x := stackpoolalloc ( order )
2014-11-20 12:08:13 -05:00
x . ptr ( ) . next = list
2014-11-11 17:04:34 -05:00
list = x
2023-04-19 14:58:47 -04:00
size += fixedStack << order
2014-11-11 17:04:34 -05:00
}
2019-08-01 16:22:28 +00:00
unlock ( & stackpool [ order ] . item . mu )
2014-11-11 17:04:34 -05:00
c . stackcache [ order ] . list = list
c . stackcache [ order ] . size = size
}
2016-05-27 12:21:14 -04:00
//go:systemstack
2014-11-11 17:04:34 -05:00
func stackcacherelease ( c * mcache , order uint8 ) {
if stackDebug >= 1 {
print ( "stackcacherelease order=" , order , "\n" )
}
x := c . stackcache [ order ] . list
size := c . stackcache [ order ] . size
2019-08-01 16:22:28 +00:00
lock ( & stackpool [ order ] . item . mu )
2014-11-11 17:04:34 -05:00
for size > _StackCacheSize / 2 {
2014-11-20 12:08:13 -05:00
y := x . ptr ( ) . next
2014-11-11 17:04:34 -05:00
stackpoolfree ( x , order )
x = y
2023-04-19 14:58:47 -04:00
size -= fixedStack << order
2014-11-11 17:04:34 -05:00
}
2019-08-01 16:22:28 +00:00
unlock ( & stackpool [ order ] . item . mu )
2014-11-11 17:04:34 -05:00
c . stackcache [ order ] . list = x
c . stackcache [ order ] . size = size
}
2016-05-27 12:21:14 -04:00
//go:systemstack
2014-11-11 17:04:34 -05:00
func stackcache_clear ( c * mcache ) {
if stackDebug >= 1 {
print ( "stackcache clear\n" )
}
for order := uint8 ( 0 ) ; order < _NumStackOrders ; order ++ {
2019-08-01 16:22:28 +00:00
lock ( & stackpool [ order ] . item . mu )
2014-11-11 17:04:34 -05:00
x := c . stackcache [ order ] . list
2014-11-20 12:08:13 -05:00
for x . ptr ( ) != nil {
y := x . ptr ( ) . next
2014-11-11 17:04:34 -05:00
stackpoolfree ( x , order )
x = y
}
2014-11-20 12:08:13 -05:00
c . stackcache [ order ] . list = 0
2014-11-11 17:04:34 -05:00
c . stackcache [ order ] . size = 0
2019-08-01 16:22:28 +00:00
unlock ( & stackpool [ order ] . item . mu )
2014-11-11 17:04:34 -05:00
}
}
2016-05-27 12:21:14 -04:00
// stackalloc allocates an n byte stack.
//
// stackalloc must run on the system stack because it uses per-P
// resources and must not split the stack.
//
//go:systemstack
2017-02-09 14:03:49 -05:00
func stackalloc ( n uint32 ) stack {
2014-11-11 17:04:34 -05:00
// Stackalloc must be called on scheduler stack, so that we
// never try to grow the stack during the code that stackalloc runs.
// Doing so would cause a deadlock (issue 1547).
thisg := getg ( )
if thisg != thisg . m . g0 {
2014-12-27 20:58:00 -08:00
throw ( "stackalloc not on scheduler stack" )
2014-11-11 17:04:34 -05:00
}
if n & ( n - 1 ) != 0 {
2014-12-27 20:58:00 -08:00
throw ( "stack size not a power of 2" )
2014-11-11 17:04:34 -05:00
}
if stackDebug >= 1 {
print ( "stackalloc " , n , "\n" )
}
if debug . efence != 0 || stackFromSystem != 0 {
2019-06-28 16:44:07 +00:00
n = uint32 ( alignUp ( uintptr ( n ) , physPageSize ) )
2025-02-01 14:19:04 +01:00
v := sysAlloc ( uintptr ( n ) , & memstats . stacks_sys , "goroutine stack (system)" )
2014-11-11 17:04:34 -05:00
if v == nil {
2014-12-27 20:58:00 -08:00
throw ( "out of memory (stackalloc)" )
2014-11-11 17:04:34 -05:00
}
2017-02-09 14:03:49 -05:00
return stack { uintptr ( v ) , uintptr ( v ) + uintptr ( n ) }
2014-11-11 17:04:34 -05:00
}
// Small stacks are allocated with a fixed-size free-list allocator.
// If we need a stack of a bigger size, we fall back on allocating
// a dedicated span.
var v unsafe . Pointer
2023-04-19 14:58:47 -04:00
if n < fixedStack << _NumStackOrders && n < _StackCacheSize {
2014-11-11 17:04:34 -05:00
order := uint8 ( 0 )
n2 := n
2023-04-19 14:58:47 -04:00
for n2 > fixedStack {
2014-11-11 17:04:34 -05:00
order ++
n2 >>= 1
}
2014-11-20 12:08:13 -05:00
var x gclinkptr
2019-11-04 14:25:22 -08:00
if stackNoCache != 0 || thisg . m . p == 0 || thisg . m . preemptoff != "" {
// thisg.m.p == 0 can happen in the guts of exitsyscall
// or procresize. Just get a stack from the global pool.
2014-11-11 17:04:34 -05:00
// Also don't touch stackcache during gc
// as it's flushed concurrently.
2019-08-01 16:22:28 +00:00
lock ( & stackpool [ order ] . item . mu )
2014-11-11 17:04:34 -05:00
x = stackpoolalloc ( order )
2019-08-01 16:22:28 +00:00
unlock ( & stackpool [ order ] . item . mu )
2014-11-11 17:04:34 -05:00
} else {
2019-11-04 14:25:22 -08:00
c := thisg . m . p . ptr ( ) . mcache
2014-11-11 17:04:34 -05:00
x = c . stackcache [ order ] . list
2014-11-20 12:08:13 -05:00
if x . ptr ( ) == nil {
2014-11-11 17:04:34 -05:00
stackcacherefill ( c , order )
x = c . stackcache [ order ] . list
}
2014-11-20 12:08:13 -05:00
c . stackcache [ order ] . list = x . ptr ( ) . next
2014-11-11 17:04:34 -05:00
c . stackcache [ order ] . size -= uintptr ( n )
}
runtime: add valgrind instrumentation
Add build tag gated Valgrind annotations to the runtime which let it
understand how the runtime manages memory. This allows for Go binaries
to be run under Valgrind without emitting spurious errors.
Instead of adding the Valgrind headers to the tree, and using cgo to
call the various Valgrind client request macros, we just add an assembly
function which emits the necessary instructions to trigger client
requests.
In particular we add instrumentation of the memory allocator, using a
two-level mempool structure (as described in the Valgrind manual [0]).
We also add annotations which allow Valgrind to track which memory we
use for stacks, which seems necessary to let it properly function.
We describe the memory model to Valgrind as follows: we treat heap
arenas as a "pool" created with VALGRIND_CREATE_MEMPOOL_EXT (so that we
can use VALGRIND_MEMPOOL_METAPOOL and VALGRIND_MEMPOOL_AUTO_FREE).
Within the pool we treat spans as "superblocks", annotated with
VALGRIND_MEMPOOL_ALLOC. We then allocate individual objects within spans
with VALGRIND_MALLOCLIKE_BLOCK.
It should be noted that running binaries under Valgrind can be _quite
slow_, and certain operations, such as running the GC, can be _very
slow_. It is recommended to run programs with GOGC=off. Additionally,
async preemption should be turned off, since it'll cause strange
behavior (GODEBUG=asyncpreemptoff=1).
Running Valgrind with --leak-check=yes will result in some errors
resulting from some things not being marked fully free'd. These likely
need more annotations to rectify, but for now it is recommended to run
with --leak-check=off.
Updates #73602
[0] https://valgrind.org/docs/manual/mc-manual.html#mc-manual.mempools
Change-Id: I71b26c47d7084de71ef1e03947ef6b1cc6d38301
Reviewed-on: https://go-review.googlesource.com/c/go/+/674077
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
2025-03-22 00:58:55 +00:00
if valgrindenabled {
// We're about to allocate the stack region starting at x.ptr().
// To prevent valgrind from complaining about overlapping allocations,
// we need to mark the (previously allocated) memory as free'd.
valgrindFree ( unsafe . Pointer ( x . ptr ( ) ) )
}
2015-10-15 14:33:50 -07:00
v = unsafe . Pointer ( x )
2014-11-11 17:04:34 -05:00
} else {
2015-12-14 14:30:25 -05:00
var s * mspan
2025-03-04 19:02:48 +00:00
npage := uintptr ( n ) >> gc . PageShift
2015-12-14 14:30:25 -05:00
log2npage := stacklog2 ( npage )
// Try to get a stack from the large stack cache.
lock ( & stackLarge . lock )
if ! stackLarge . free [ log2npage ] . isEmpty ( ) {
s = stackLarge . free [ log2npage ] . first
stackLarge . free [ log2npage ] . remove ( s )
}
unlock ( & stackLarge . lock )
runtime: static lock ranking for the runtime (enabled by GOEXPERIMENT)
I took some of the infrastructure from Austin's lock logging CR
https://go-review.googlesource.com/c/go/+/192704 (with deadlock
detection from the logs), and developed a setup to give static lock
ranking for runtime locks.
Static lock ranking establishes a documented total ordering among locks,
and then reports an error if the total order is violated. This can
happen if a deadlock happens (by acquiring a sequence of locks in
different orders), or if just one side of a possible deadlock happens.
Lock ordering deadlocks cannot happen as long as the lock ordering is
followed.
Along the way, I found a deadlock involving the new timer code, which Ian fixed
via https://go-review.googlesource.com/c/go/+/207348, as well as two other
potential deadlocks.
See the constants at the top of runtime/lockrank.go to show the static
lock ranking that I ended up with, along with some comments. This is
great documentation of the current intended lock ordering when acquiring
multiple locks in the runtime.
I also added an array lockPartialOrder[] which shows and enforces the
current partial ordering among locks (which is embedded within the total
ordering). This is more specific about the dependencies among locks.
I don't try to check the ranking within a lock class with multiple locks
that can be acquired at the same time (i.e. check the ranking when
multiple hchan locks are acquired).
Currently, I am doing a lockInit() call to set the lock rank of most
locks. Any lock that is not otherwise initialized is assumed to be a
leaf lock (a very high rank lock), so that eliminates the need to do
anything for a bunch of locks (including all architecture-dependent
locks). For two locks, root.lock and notifyList.lock (only in the
runtime/sema.go file), it is not as easy to do lock initialization, so
instead, I am passing the lock rank with the lock calls.
For Windows compilation, I needed to increase the StackGuard size from
896 to 928 because of the new lock-rank checking functions.
Checking of the static lock ranking is enabled by setting
GOEXPERIMENT=staticlockranking before doing a run.
To make sure that the static lock ranking code has no overhead in memory
or CPU when not enabled by GOEXPERIMENT, I changed 'go build/install' so
that it defines a build tag (with the same name) whenever any experiment
has been baked into the toolchain (by checking Expstring()). This allows
me to avoid increasing the size of the 'mutex' type when static lock
ranking is not enabled.
Fixes #38029
Change-Id: I154217ff307c47051f8dae9c2a03b53081acd83a
Reviewed-on: https://go-review.googlesource.com/c/go/+/207619
Reviewed-by: Dan Scales <danscales@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Dan Scales <danscales@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2019-11-13 17:34:47 -08:00
lockWithRankMayAcquire ( & mheap_ . lock , lockRankMheap )
2014-11-11 17:04:34 -05:00
if s == nil {
2015-12-14 14:30:25 -05:00
// Allocate a new stack from the heap.
2020-07-29 19:00:37 +00:00
s = mheap_ . allocManual ( npage , spanAllocStack )
2015-12-14 14:30:25 -05:00
if s == nil {
throw ( "out of memory" )
}
2018-06-29 14:56:48 -04:00
osStackAlloc ( s )
2017-03-16 14:55:10 -04:00
s . elemsize = uintptr ( n )
2014-11-11 17:04:34 -05:00
}
2016-04-28 10:59:00 -04:00
v = unsafe . Pointer ( s . base ( ) )
2014-11-11 17:04:34 -05:00
}
2024-04-24 16:26:39 +00:00
if traceAllocFreeEnabled ( ) {
2024-05-22 21:46:29 +00:00
trace := traceAcquire ( )
2024-04-24 16:26:39 +00:00
if trace . ok ( ) {
trace . GoroutineStackAlloc ( uintptr ( v ) , uintptr ( n ) )
traceRelease ( trace )
}
}
2014-11-11 17:04:34 -05:00
if raceenabled {
racemalloc ( v , uintptr ( n ) )
}
2015-10-21 11:04:42 -07:00
if msanenabled {
msanmalloc ( v , uintptr ( n ) )
}
2021-01-05 17:52:43 +08:00
if asanenabled {
asanunpoison ( v , uintptr ( n ) )
}
runtime: add valgrind instrumentation
Add build tag gated Valgrind annotations to the runtime which let it
understand how the runtime manages memory. This allows for Go binaries
to be run under Valgrind without emitting spurious errors.
Instead of adding the Valgrind headers to the tree, and using cgo to
call the various Valgrind client request macros, we just add an assembly
function which emits the necessary instructions to trigger client
requests.
In particular we add instrumentation of the memory allocator, using a
two-level mempool structure (as described in the Valgrind manual [0]).
We also add annotations which allow Valgrind to track which memory we
use for stacks, which seems necessary to let it properly function.
We describe the memory model to Valgrind as follows: we treat heap
arenas as a "pool" created with VALGRIND_CREATE_MEMPOOL_EXT (so that we
can use VALGRIND_MEMPOOL_METAPOOL and VALGRIND_MEMPOOL_AUTO_FREE).
Within the pool we treat spans as "superblocks", annotated with
VALGRIND_MEMPOOL_ALLOC. We then allocate individual objects within spans
with VALGRIND_MALLOCLIKE_BLOCK.
It should be noted that running binaries under Valgrind can be _quite
slow_, and certain operations, such as running the GC, can be _very
slow_. It is recommended to run programs with GOGC=off. Additionally,
async preemption should be turned off, since it'll cause strange
behavior (GODEBUG=asyncpreemptoff=1).
Running Valgrind with --leak-check=yes will result in some errors
resulting from some things not being marked fully free'd. These likely
need more annotations to rectify, but for now it is recommended to run
with --leak-check=off.
Updates #73602
[0] https://valgrind.org/docs/manual/mc-manual.html#mc-manual.mempools
Change-Id: I71b26c47d7084de71ef1e03947ef6b1cc6d38301
Reviewed-on: https://go-review.googlesource.com/c/go/+/674077
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
2025-03-22 00:58:55 +00:00
if valgrindenabled {
valgrindMalloc ( v , uintptr ( n ) )
}
2014-11-11 17:04:34 -05:00
if stackDebug >= 1 {
print ( " allocated " , v , "\n" )
}
2017-02-09 14:03:49 -05:00
return stack { uintptr ( v ) , uintptr ( v ) + uintptr ( n ) }
2014-11-11 17:04:34 -05:00
}
2016-05-27 12:21:14 -04:00
// stackfree frees an n byte stack allocation at stk.
//
// stackfree must run on the system stack because it uses per-P
// resources and must not split the stack.
//
//go:systemstack
2017-02-09 14:11:13 -05:00
func stackfree ( stk stack ) {
2014-11-11 17:04:34 -05:00
gp := getg ( )
2015-10-15 14:33:50 -07:00
v := unsafe . Pointer ( stk . lo )
2017-02-09 14:11:13 -05:00
n := stk . hi - stk . lo
2014-11-11 17:04:34 -05:00
if n & ( n - 1 ) != 0 {
2014-12-27 20:58:00 -08:00
throw ( "stack not a power of 2" )
2014-11-11 17:04:34 -05:00
}
2015-05-20 15:29:53 -04:00
if stk . lo + n < stk . hi {
throw ( "bad stack size" )
}
2014-11-11 17:04:34 -05:00
if stackDebug >= 1 {
println ( "stackfree" , v , n )
2016-10-17 18:41:56 -04:00
memclrNoHeapPointers ( v , n ) // for testing, clobber stack data
2014-11-11 17:04:34 -05:00
}
if debug . efence != 0 || stackFromSystem != 0 {
if debug . efence != 0 || stackFaultOnFree != 0 {
sysFault ( v , n )
} else {
sysFree ( v , n , & memstats . stacks_sys )
}
return
}
2024-04-24 16:26:39 +00:00
if traceAllocFreeEnabled ( ) {
2024-05-22 21:46:29 +00:00
trace := traceAcquire ( )
2024-04-24 16:26:39 +00:00
if trace . ok ( ) {
trace . GoroutineStackFree ( uintptr ( v ) )
traceRelease ( trace )
}
}
2015-10-21 11:04:42 -07:00
if msanenabled {
msanfree ( v , n )
}
2021-01-05 17:52:43 +08:00
if asanenabled {
asanpoison ( v , n )
}
runtime: add valgrind instrumentation
Add build tag gated Valgrind annotations to the runtime which let it
understand how the runtime manages memory. This allows for Go binaries
to be run under Valgrind without emitting spurious errors.
Instead of adding the Valgrind headers to the tree, and using cgo to
call the various Valgrind client request macros, we just add an assembly
function which emits the necessary instructions to trigger client
requests.
In particular we add instrumentation of the memory allocator, using a
two-level mempool structure (as described in the Valgrind manual [0]).
We also add annotations which allow Valgrind to track which memory we
use for stacks, which seems necessary to let it properly function.
We describe the memory model to Valgrind as follows: we treat heap
arenas as a "pool" created with VALGRIND_CREATE_MEMPOOL_EXT (so that we
can use VALGRIND_MEMPOOL_METAPOOL and VALGRIND_MEMPOOL_AUTO_FREE).
Within the pool we treat spans as "superblocks", annotated with
VALGRIND_MEMPOOL_ALLOC. We then allocate individual objects within spans
with VALGRIND_MALLOCLIKE_BLOCK.
It should be noted that running binaries under Valgrind can be _quite
slow_, and certain operations, such as running the GC, can be _very
slow_. It is recommended to run programs with GOGC=off. Additionally,
async preemption should be turned off, since it'll cause strange
behavior (GODEBUG=asyncpreemptoff=1).
Running Valgrind with --leak-check=yes will result in some errors
resulting from some things not being marked fully free'd. These likely
need more annotations to rectify, but for now it is recommended to run
with --leak-check=off.
Updates #73602
[0] https://valgrind.org/docs/manual/mc-manual.html#mc-manual.mempools
Change-Id: I71b26c47d7084de71ef1e03947ef6b1cc6d38301
Reviewed-on: https://go-review.googlesource.com/c/go/+/674077
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
2025-03-22 00:58:55 +00:00
if valgrindenabled {
valgrindFree ( v )
}
2023-04-19 14:58:47 -04:00
if n < fixedStack << _NumStackOrders && n < _StackCacheSize {
2014-11-11 17:04:34 -05:00
order := uint8 ( 0 )
n2 := n
2023-04-19 14:58:47 -04:00
for n2 > fixedStack {
2014-11-11 17:04:34 -05:00
order ++
n2 >>= 1
}
2014-11-20 12:08:13 -05:00
x := gclinkptr ( v )
2019-11-04 14:25:22 -08:00
if stackNoCache != 0 || gp . m . p == 0 || gp . m . preemptoff != "" {
2019-08-01 16:22:28 +00:00
lock ( & stackpool [ order ] . item . mu )
runtime: add valgrind instrumentation
Add build tag gated Valgrind annotations to the runtime which let it
understand how the runtime manages memory. This allows for Go binaries
to be run under Valgrind without emitting spurious errors.
Instead of adding the Valgrind headers to the tree, and using cgo to
call the various Valgrind client request macros, we just add an assembly
function which emits the necessary instructions to trigger client
requests.
In particular we add instrumentation of the memory allocator, using a
two-level mempool structure (as described in the Valgrind manual [0]).
We also add annotations which allow Valgrind to track which memory we
use for stacks, which seems necessary to let it properly function.
We describe the memory model to Valgrind as follows: we treat heap
arenas as a "pool" created with VALGRIND_CREATE_MEMPOOL_EXT (so that we
can use VALGRIND_MEMPOOL_METAPOOL and VALGRIND_MEMPOOL_AUTO_FREE).
Within the pool we treat spans as "superblocks", annotated with
VALGRIND_MEMPOOL_ALLOC. We then allocate individual objects within spans
with VALGRIND_MALLOCLIKE_BLOCK.
It should be noted that running binaries under Valgrind can be _quite
slow_, and certain operations, such as running the GC, can be _very
slow_. It is recommended to run programs with GOGC=off. Additionally,
async preemption should be turned off, since it'll cause strange
behavior (GODEBUG=asyncpreemptoff=1).
Running Valgrind with --leak-check=yes will result in some errors
resulting from some things not being marked fully free'd. These likely
need more annotations to rectify, but for now it is recommended to run
with --leak-check=off.
Updates #73602
[0] https://valgrind.org/docs/manual/mc-manual.html#mc-manual.mempools
Change-Id: I71b26c47d7084de71ef1e03947ef6b1cc6d38301
Reviewed-on: https://go-review.googlesource.com/c/go/+/674077
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
2025-03-22 00:58:55 +00:00
if valgrindenabled {
// x.ptr() is the head of the list of free stacks, and will be used
// when allocating a new stack, so it has to be marked allocated.
valgrindMalloc ( unsafe . Pointer ( x . ptr ( ) ) , unsafe . Sizeof ( x . ptr ( ) ) )
}
2014-11-11 17:04:34 -05:00
stackpoolfree ( x , order )
2019-08-01 16:22:28 +00:00
unlock ( & stackpool [ order ] . item . mu )
2014-11-11 17:04:34 -05:00
} else {
2019-11-04 14:25:22 -08:00
c := gp . m . p . ptr ( ) . mcache
2014-11-11 17:04:34 -05:00
if c . stackcache [ order ] . size >= _StackCacheSize {
stackcacherelease ( c , order )
}
runtime: add valgrind instrumentation
Add build tag gated Valgrind annotations to the runtime which let it
understand how the runtime manages memory. This allows for Go binaries
to be run under Valgrind without emitting spurious errors.
Instead of adding the Valgrind headers to the tree, and using cgo to
call the various Valgrind client request macros, we just add an assembly
function which emits the necessary instructions to trigger client
requests.
In particular we add instrumentation of the memory allocator, using a
two-level mempool structure (as described in the Valgrind manual [0]).
We also add annotations which allow Valgrind to track which memory we
use for stacks, which seems necessary to let it properly function.
We describe the memory model to Valgrind as follows: we treat heap
arenas as a "pool" created with VALGRIND_CREATE_MEMPOOL_EXT (so that we
can use VALGRIND_MEMPOOL_METAPOOL and VALGRIND_MEMPOOL_AUTO_FREE).
Within the pool we treat spans as "superblocks", annotated with
VALGRIND_MEMPOOL_ALLOC. We then allocate individual objects within spans
with VALGRIND_MALLOCLIKE_BLOCK.
It should be noted that running binaries under Valgrind can be _quite
slow_, and certain operations, such as running the GC, can be _very
slow_. It is recommended to run programs with GOGC=off. Additionally,
async preemption should be turned off, since it'll cause strange
behavior (GODEBUG=asyncpreemptoff=1).
Running Valgrind with --leak-check=yes will result in some errors
resulting from some things not being marked fully free'd. These likely
need more annotations to rectify, but for now it is recommended to run
with --leak-check=off.
Updates #73602
[0] https://valgrind.org/docs/manual/mc-manual.html#mc-manual.mempools
Change-Id: I71b26c47d7084de71ef1e03947ef6b1cc6d38301
Reviewed-on: https://go-review.googlesource.com/c/go/+/674077
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
2025-03-22 00:58:55 +00:00
if valgrindenabled {
// x.ptr() is the head of the list of free stacks, and will
// be used when allocating a new stack, so it has to be
// marked allocated.
valgrindMalloc ( unsafe . Pointer ( x . ptr ( ) ) , unsafe . Sizeof ( x . ptr ( ) ) )
}
2014-11-20 12:08:13 -05:00
x . ptr ( ) . next = c . stackcache [ order ] . list
2014-11-11 17:04:34 -05:00
c . stackcache [ order ] . list = x
c . stackcache [ order ] . size += n
}
} else {
2017-12-04 10:58:15 -05:00
s := spanOfUnchecked ( uintptr ( v ) )
runtime: atomically set span state and use as publication barrier
When everything is working correctly, any pointer the garbage
collector encounters can only point into a fully initialized heap
span, since the span must have been initialized before that pointer
could escape the heap allocator and become visible to the GC.
However, in various cases, we try to be defensive against bad
pointers. In findObject, this is just a sanity check: we never expect
to find a bad pointer, but programming errors can lead to them. In
spanOfHeap, we don't necessarily trust the pointer and we're trying to
check if it really does point to the heap, though it should always
point to something. Conservative scanning takes this to a new level,
since it can only guess that a word may be a pointer and verify this.
In all of these cases, we have a problem that the span lookup and
check can race with span initialization, since the span becomes
visible to lookups before it's fully initialized.
Furthermore, we're about to start initializing the span without the
heap lock held, which is going to introduce races where accesses were
previously protected by the heap lock.
To address this, this CL makes accesses to mspan.state atomic, and
ensures that the span is fully initialized before setting the state to
mSpanInUse. All loads are now atomic, and in any case where we don't
trust the pointer, it first atomically loads the span state and checks
that it's mSpanInUse, after which it will have synchronized with span
initialization and can safely check the other span fields.
For #10958, #24543, but a good fix in general.
Change-Id: I518b7c63555b02064b98aa5f802c92b758fef853
Reviewed-on: https://go-review.googlesource.com/c/go/+/203286
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
2019-10-23 11:25:38 -04:00
if s . state . get ( ) != mSpanManual {
2016-04-28 10:59:00 -04:00
println ( hex ( s . base ( ) ) , v )
2014-12-27 20:58:00 -08:00
throw ( "bad span state" )
2014-11-11 17:04:34 -05:00
}
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
if gcphase == _GCoff {
// Free the stack immediately if we're
// sweeping.
2018-06-29 14:56:48 -04:00
osStackFree ( s )
2020-07-29 19:00:37 +00:00
mheap_ . freeManual ( s , spanAllocStack )
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
} else {
2015-12-14 14:30:25 -05:00
// If the GC is running, we can't return a
// stack span to the heap because it could be
// reused as a heap span, and this state
// change would race with GC. Add it to the
// large stack cache instead.
log2npage := stacklog2 ( s . npages )
lock ( & stackLarge . lock )
stackLarge . free [ log2npage ] . insert ( s )
unlock ( & stackLarge . lock )
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
}
2014-11-11 17:04:34 -05:00
}
}
var maxstacksize uintptr = 1 << 20 // enough until runtime.main sets it for real
2020-09-18 19:15:41 +03:00
var maxstackceiling = maxstacksize
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
var ptrnames = [ ] string {
0 : "scalar" ,
1 : "ptr" ,
2014-11-11 17:04:34 -05:00
}
// Stack frame layout
//
// (x86)
// +------------------+
// | args from caller |
// +------------------+ <- frame->argp
// | return address |
2015-01-14 11:09:50 -05:00
// +------------------+
2020-07-06 16:03:33 -04:00
// | caller's BP (*) | (*) if framepointer_enabled && varp > sp
2014-11-11 17:04:34 -05:00
// +------------------+ <- frame->varp
// | locals |
// +------------------+
// | args to callee |
// +------------------+ <- frame->sp
//
2025-10-07 07:58:50 -07:00
// (arm)
2014-11-11 17:04:34 -05:00
// +------------------+
// | args from caller |
// +------------------+ <- frame->argp
2025-10-07 07:58:50 -07:00
// | caller's retaddr |
2020-07-06 16:03:33 -04:00
// +------------------+
2025-10-07 07:58:50 -07:00
// | caller's FP (*) | (*) on ARM64, if framepointer_enabled && varp > sp
2014-11-11 17:04:34 -05:00
// +------------------+ <- frame->varp
// | locals |
// +------------------+
// | args to callee |
// +------------------+
2025-10-07 07:58:50 -07:00
// | return address |
2014-11-11 17:04:34 -05:00
// +------------------+ <- frame->sp
2020-07-06 16:03:33 -04:00
//
// varp > sp means that the function has a frame;
// varp == sp means frameless function.
2014-11-11 17:04:34 -05:00
type adjustinfo struct {
old stack
delta uintptr // ptr distance from old to new stack (newbase - oldbase)
2016-02-15 17:38:06 -05:00
// sghi is the highest sudog.elem on the stack.
sghi uintptr
2014-11-11 17:04:34 -05:00
}
2022-11-03 15:39:41 +08:00
// adjustpointer checks whether *vpp is in the old stack described by adjinfo.
2014-11-11 17:04:34 -05:00
// If so, it rewrites *vpp to point into the new stack.
func adjustpointer ( adjinfo * adjustinfo , vpp unsafe . Pointer ) {
2015-11-23 11:34:16 -05:00
pp := ( * uintptr ) ( vpp )
2014-11-11 17:04:34 -05:00
p := * pp
if stackDebug >= 4 {
2015-11-23 11:34:16 -05:00
print ( " " , pp , ":" , hex ( p ) , "\n" )
2014-11-11 17:04:34 -05:00
}
runtime: add valgrind instrumentation
Add build tag gated Valgrind annotations to the runtime which let it
understand how the runtime manages memory. This allows for Go binaries
to be run under Valgrind without emitting spurious errors.
Instead of adding the Valgrind headers to the tree, and using cgo to
call the various Valgrind client request macros, we just add an assembly
function which emits the necessary instructions to trigger client
requests.
In particular we add instrumentation of the memory allocator, using a
two-level mempool structure (as described in the Valgrind manual [0]).
We also add annotations which allow Valgrind to track which memory we
use for stacks, which seems necessary to let it properly function.
We describe the memory model to Valgrind as follows: we treat heap
arenas as a "pool" created with VALGRIND_CREATE_MEMPOOL_EXT (so that we
can use VALGRIND_MEMPOOL_METAPOOL and VALGRIND_MEMPOOL_AUTO_FREE).
Within the pool we treat spans as "superblocks", annotated with
VALGRIND_MEMPOOL_ALLOC. We then allocate individual objects within spans
with VALGRIND_MALLOCLIKE_BLOCK.
It should be noted that running binaries under Valgrind can be _quite
slow_, and certain operations, such as running the GC, can be _very
slow_. It is recommended to run programs with GOGC=off. Additionally,
async preemption should be turned off, since it'll cause strange
behavior (GODEBUG=asyncpreemptoff=1).
Running Valgrind with --leak-check=yes will result in some errors
resulting from some things not being marked fully free'd. These likely
need more annotations to rectify, but for now it is recommended to run
with --leak-check=off.
Updates #73602
[0] https://valgrind.org/docs/manual/mc-manual.html#mc-manual.mempools
Change-Id: I71b26c47d7084de71ef1e03947ef6b1cc6d38301
Reviewed-on: https://go-review.googlesource.com/c/go/+/674077
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
2025-03-22 00:58:55 +00:00
if valgrindenabled {
// p is a pointer on a stack, it is inherently initialized, as
// everything on the stack is, but valgrind for _some unknown reason_
// sometimes thinks it's uninitialized, and flags operations on p below
// as uninitialized. We just initialize it if valgrind thinks its
// uninitialized.
//
// See go.dev/issues/73801.
valgrindMakeMemDefined ( unsafe . Pointer ( & p ) , unsafe . Sizeof ( & p ) )
}
2015-11-23 11:34:16 -05:00
if adjinfo . old . lo <= p && p < adjinfo . old . hi {
* pp = p + adjinfo . delta
2014-11-11 17:04:34 -05:00
if stackDebug >= 3 {
2015-11-23 11:34:16 -05:00
print ( " adjust ptr " , pp , ":" , hex ( p ) , " -> " , hex ( * pp ) , "\n" )
2014-11-11 17:04:34 -05:00
}
}
}
2015-05-04 10:19:24 -04:00
// Information from the compiler about the layout of stack frames.
2020-06-18 12:51:35 -07:00
// Note: this type must agree with reflect.bitVector.
2015-05-04 10:19:24 -04:00
type bitvector struct {
n int32 // # of bits
bytedata * uint8
}
2018-04-01 11:01:36 -07:00
// ptrbit returns the i'th bit in bv.
// ptrbit is less efficient than iterating directly over bitvector bits,
// and should only be used in non-performance-critical code.
// See adjustpointers for an example of a high-efficiency walk of a bitvector.
func ( bv * bitvector ) ptrbit ( i uintptr ) uint8 {
b := * ( addb ( bv . bytedata , i / 8 ) )
return ( b >> ( i % 8 ) ) & 1
2014-11-11 17:04:34 -05:00
}
// bv describes the memory starting at address scanp.
// Adjust any pointers contained therein.
2018-04-01 11:01:36 -07:00
func adjustpointers ( scanp unsafe . Pointer , bv * bitvector , adjinfo * adjustinfo , f funcInfo ) {
2014-11-11 17:04:34 -05:00
minp := adjinfo . old . lo
maxp := adjinfo . old . hi
delta := adjinfo . delta
2018-04-01 11:01:36 -07:00
num := uintptr ( bv . n )
2016-02-15 17:38:06 -05:00
// If this frame might contain channel receive slots, use CAS
// to adjust pointers. If the slot hasn't been received into
// yet, it may contain stack pointers and a concurrent send
// could race with adjusting those pointers. (The sent value
// itself can never contain stack pointers.)
useCAS := uintptr ( scanp ) < adjinfo . sghi
2018-04-01 11:01:36 -07:00
for i := uintptr ( 0 ) ; i < num ; i += 8 {
2014-11-11 17:04:34 -05:00
if stackDebug >= 4 {
2018-04-01 11:01:36 -07:00
for j := uintptr ( 0 ) ; j < 8 ; j ++ {
2021-06-16 23:05:44 +00:00
print ( " " , add ( scanp , ( i + j ) * goarch . PtrSize ) , ":" , ptrnames [ bv . ptrbit ( i + j ) ] , ":" , hex ( * ( * uintptr ) ( add ( scanp , ( i + j ) * goarch . PtrSize ) ) ) , " # " , i , " " , * addb ( bv . bytedata , i / 8 ) , "\n" )
2018-04-01 11:01:36 -07:00
}
2017-08-17 15:51:35 +01:00
}
2018-04-01 11:01:36 -07:00
b := * ( addb ( bv . bytedata , i / 8 ) )
for b != 0 {
2022-10-05 15:29:29 +08:00
j := uintptr ( sys . TrailingZeros8 ( b ) )
2018-04-01 11:01:36 -07:00
b &= b - 1
2021-06-16 23:05:44 +00:00
pp := ( * uintptr ) ( add ( scanp , ( i + j ) * goarch . PtrSize ) )
2018-04-01 11:01:36 -07:00
retry :
p := * pp
if f . valid ( ) && 0 < p && p < minLegalPointer && debug . invalidptr != 0 {
// Looks like a junk value in a pointer slot.
// Live analysis wrong?
getg ( ) . m . traceback = 2
print ( "runtime: bad pointer in frame " , funcname ( f ) , " at " , pp , ": " , hex ( p ) , "\n" )
throw ( "invalid pointer found on stack" )
2014-11-11 17:04:34 -05:00
}
2018-04-01 11:01:36 -07:00
if minp <= p && p < maxp {
if stackDebug >= 3 {
print ( "adjust ptr " , hex ( p ) , " " , funcname ( f ) , "\n" )
}
if useCAS {
ppu := ( * unsafe . Pointer ) ( unsafe . Pointer ( pp ) )
if ! atomic . Casp1 ( ppu , unsafe . Pointer ( p ) , unsafe . Pointer ( p + delta ) ) {
goto retry
}
} else {
* pp = p + delta
2016-02-15 17:38:06 -05:00
}
2014-11-11 17:04:34 -05:00
}
}
}
}
// Note: the argument/return area is adjusted by the callee.
2023-02-13 16:20:54 -05:00
func adjustframe ( frame * stkframe , adjinfo * adjustinfo ) {
2018-04-26 21:20:41 -04:00
if frame . continpc == 0 {
2014-11-11 17:04:34 -05:00
// Frame is dead.
2023-02-13 16:20:54 -05:00
return
2014-11-11 17:04:34 -05:00
}
f := frame . fn
if stackDebug >= 2 {
print ( " adjusting " , funcname ( f ) , " frame=[" , hex ( frame . sp ) , "," , hex ( frame . fp ) , "] pc=" , hex ( frame . pc ) , " continpc=" , hex ( frame . continpc ) , "\n" )
}
2020-07-06 16:03:33 -04:00
// Adjust saved frame pointer if there is one.
2025-10-07 07:58:50 -07:00
if ( goarch . ArchFamily == goarch . AMD64 || goarch . ArchFamily == goarch . ARM64 ) && frame . argp - frame . varp == 2 * goarch . PtrSize {
2015-01-14 11:09:50 -05:00
if stackDebug >= 3 {
print ( " saved bp\n" )
}
2016-12-02 15:17:52 -08:00
if debugCheckBP {
// Frame pointers should always point to the next higher frame on
// the Go stack (or be nil, for the top frame on the stack).
bp := * ( * uintptr ) ( unsafe . Pointer ( frame . varp ) )
if bp != 0 && ( bp < adjinfo . old . lo || bp >= adjinfo . old . hi ) {
println ( "runtime: found invalid frame pointer" )
print ( "bp=" , hex ( bp ) , " min=" , hex ( adjinfo . old . lo ) , " max=" , hex ( adjinfo . old . hi ) , "\n" )
throw ( "bad frame pointer" )
}
}
2025-10-07 07:58:50 -07:00
// On AMD64, this is the caller's frame pointer saved in the current
// frame.
// On ARM64, this is the frame pointer of the caller's caller saved
// by the caller in its frame (one word below its SP).
2015-01-14 11:09:50 -05:00
adjustpointer ( adjinfo , unsafe . Pointer ( frame . varp ) )
}
2023-08-01 14:41:42 -04:00
locals , args , objs := frame . getStackMap ( true )
runtime: fix systemstack frame pointer adjustment
Change adjustframe to adjust the frame pointer of systemstack (aka
FuncID_systemstack_switch) before returning early.
Without this fix it is possible for traceEvent() to crash when using
frame pointer unwinding. The issue occurs when a goroutine calls
systemstack in order to call shrinkstack. While returning, systemstack
will restore the unadjusted frame pointer from its frame as part of its
epilogue. If the callee of systemstack then triggers a traceEvent, it
will try to unwind into the old stack. This can lead to a crash if the
memory of the old stack has been reused or freed in the meantime.
The most common situation in which this will manifest is when when
gcAssistAlloc() invokes gcAssistAlloc1() on systemstack() and performs a
shrinkstack() followed by a traceGCMarkAssistDone() or Gosched()
triggering traceEvent().
See CL 489115 for a deterministic test case that triggers the issue.
Meanwhile the problem can frequently be observed using the command
below:
$ GODEBUG=tracefpunwindoff=0 ../bin/go test -trace /dev/null -run TestDeferHeapAndStack ./runtime
SIGSEGV: segmentation violation
PC=0x45f977 m=14 sigcode=128
goroutine 0 [idle]:
runtime.fpTracebackPCs(...)
.../go/src/runtime/trace.go:945
runtime.traceStackID(0xcdab904677a?, {0x7f1584346018, 0x0?, 0x80}, 0x0?)
.../go/src/runtime/trace.go:917 +0x217 fp=0x7f1565ffab00 sp=0x7f1565ffaab8 pc=0x45f977
runtime.traceEventLocked(0x0?, 0x0?, 0x0?, 0xc00003dbd0, 0x12, 0x0, 0x1, {0x0, 0x0, 0x0})
.../go/src/runtime/trace.go:760 +0x285 fp=0x7f1565ffab78 sp=0x7f1565ffab00 pc=0x45ef45
runtime.traceEvent(0xf5?, 0x1, {0x0, 0x0, 0x0})
.../go/src/runtime/trace.go:692 +0xa9 fp=0x7f1565ffabe0 sp=0x7f1565ffab78 pc=0x45ec49
runtime.traceGoPreempt(...)
.../go/src/runtime/trace.go:1535
runtime.gopreempt_m(0xc000328340?)
.../go/src/runtime/proc.go:3551 +0x45 fp=0x7f1565ffac20 sp=0x7f1565ffabe0 pc=0x4449a5
runtime.newstack()
.../go/src/runtime/stack.go:1077 +0x3cb fp=0x7f1565ffadd0 sp=0x7f1565ffac20 pc=0x455feb
runtime.morestack()
.../go/src/runtime/asm_amd64.s:593 +0x8f fp=0x7f1565ffadd8 sp=0x7f1565ffadd0 pc=0x47644f
goroutine 19 [running]:
runtime.traceEvent(0x2c?, 0xffffffffffffffff, {0x0, 0x0, 0x0})
.../go/src/runtime/trace.go:669 +0xe8 fp=0xc0006e6c28 sp=0xc0006e6c20 pc=0x45ec88
runtime.traceGCMarkAssistDone(...)
.../go/src/runtime/trace.go:1497
runtime.gcAssistAlloc(0xc0003281a0)
.../go/src/runtime/mgcmark.go:517 +0x27d fp=0xc0006e6c88 sp=0xc0006e6c28 pc=0x421a1d
runtime.deductAssistCredit(0x0?)
.../go/src/runtime/malloc.go:1287 +0x54 fp=0xc0006e6cb0 sp=0xc0006e6c88 pc=0x40fed4
runtime.mallocgc(0x400, 0x7a9420, 0x1)
.../go/src/runtime/malloc.go:1002 +0xc9 fp=0xc0006e6d18 sp=0xc0006e6cb0 pc=0x40f709
runtime.newobject(0xb3?)
.../go/src/runtime/malloc.go:1324 +0x25 fp=0xc0006e6d40 sp=0xc0006e6d18 pc=0x40ffc5
runtime_test.deferHeapAndStack(0xb4)
.../go/src/runtime/stack_test.go:924 +0x165 fp=0xc0006e6e20 sp=0xc0006e6d40 pc=0x75c2a5
Fixes #59692
Co-Authored-By: Cherry Mui <cherryyz@google.com>
Co-Authored-By: Michael Knyszek <mknyszek@google.com>
Co-Authored-By: Nick Ripley <nick.ripley@datadoghq.com>
Change-Id: I1c0c28327fc2fac0b8cfdbaa72e25584331be31e
Reviewed-on: https://go-review.googlesource.com/c/go/+/489015
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Run-TryBot: Felix Geisendörfer <felix.geisendoerfer@datadoghq.com>
2023-04-22 09:54:46 +03:00
// Adjust local variables if stack frame has been allocated.
if locals . n > 0 {
size := uintptr ( locals . n ) * goarch . PtrSize
adjustpointers ( unsafe . Pointer ( frame . varp - size ) , & locals , adjinfo , f )
}
2014-11-11 17:04:34 -05:00
// Adjust arguments.
2018-04-26 21:20:41 -04:00
if args . n > 0 {
2014-11-11 17:04:34 -05:00
if stackDebug >= 3 {
print ( " args\n" )
}
2018-04-26 21:20:41 -04:00
adjustpointers ( unsafe . Pointer ( frame . argp ) , & args , adjinfo , funcInfo { } )
2014-11-11 17:04:34 -05:00
}
2018-09-01 20:16:39 -07:00
// Adjust pointers in all stack objects (whether they are live or not).
// See comments in mgcmark.go:scanframeworker.
if frame . varp != 0 {
2021-09-27 14:27:20 -07:00
for i := range objs {
obj := & objs [ i ]
2018-09-01 20:16:39 -07:00
off := obj . off
base := frame . varp // locals base pointer
if off >= 0 {
base = frame . argp // arguments and return values base pointer
}
p := base + uintptr ( off )
if p < frame . sp {
// Object hasn't been allocated in the frame yet.
// (Happens when the stack bounds check fails and
// we call into morestack.)
continue
}
2023-11-15 13:38:06 -08:00
ptrBytes , gcData := obj . gcdata ( )
for i := uintptr ( 0 ) ; i < ptrBytes ; i += goarch . PtrSize {
if * addb ( gcData , i / ( 8 * goarch . PtrSize ) ) >> ( i / goarch . PtrSize & 7 ) & 1 != 0 {
2018-09-01 20:16:39 -07:00
adjustpointer ( adjinfo , unsafe . Pointer ( p + i ) )
}
}
}
}
2014-11-11 17:04:34 -05:00
}
func adjustctxt ( gp * g , adjinfo * adjustinfo ) {
2015-10-15 14:33:50 -07:00
adjustpointer ( adjinfo , unsafe . Pointer ( & gp . sched . ctxt ) )
2016-12-02 15:17:52 -08:00
if ! framepointer_enabled {
return
}
if debugCheckBP {
bp := gp . sched . bp
if bp != 0 && ( bp < adjinfo . old . lo || bp >= adjinfo . old . hi ) {
println ( "runtime: found invalid top frame pointer" )
print ( "bp=" , hex ( bp ) , " min=" , hex ( adjinfo . old . lo ) , " max=" , hex ( adjinfo . old . hi ) , "\n" )
throw ( "bad top frame pointer" )
}
}
2020-07-06 16:03:33 -04:00
oldfp := gp . sched . bp
2016-12-02 15:17:52 -08:00
adjustpointer ( adjinfo , unsafe . Pointer ( & gp . sched . bp ) )
2020-07-06 16:03:33 -04:00
if GOARCH == "arm64" {
// On ARM64, the frame pointer is saved one word *below* the SP,
// which is not copied or adjusted in any frame. Do it explicitly
// here.
if oldfp == gp . sched . sp - goarch . PtrSize {
memmove ( unsafe . Pointer ( gp . sched . bp ) , unsafe . Pointer ( oldfp ) , goarch . PtrSize )
adjustpointer ( adjinfo , unsafe . Pointer ( gp . sched . bp ) )
}
}
2014-11-11 17:04:34 -05:00
}
func adjustdefers ( gp * g , adjinfo * adjustinfo ) {
// Adjust pointers in the Defer structs.
2019-06-08 17:20:57 +00:00
// We need to do this first because we need to adjust the
// defer.link fields so we always work on the new stack.
adjustpointer ( adjinfo , unsafe . Pointer ( & gp . _defer ) )
2014-11-11 17:04:34 -05:00
for d := gp . _defer ; d != nil ; d = d . link {
2015-10-15 14:33:50 -07:00
adjustpointer ( adjinfo , unsafe . Pointer ( & d . fn ) )
adjustpointer ( adjinfo , unsafe . Pointer ( & d . sp ) )
2019-06-08 17:20:57 +00:00
adjustpointer ( adjinfo , unsafe . Pointer ( & d . link ) )
2014-11-11 17:04:34 -05:00
}
}
func adjustpanics ( gp * g , adjinfo * adjustinfo ) {
// Panics are on stack and already adjusted.
// Update pointer to head of list in G.
2015-10-15 14:33:50 -07:00
adjustpointer ( adjinfo , unsafe . Pointer ( & gp . _panic ) )
2014-11-11 17:04:34 -05:00
}
func adjustsudogs ( gp * g , adjinfo * adjustinfo ) {
// the data elements pointed to by a SudoG structure
// might be in the stack.
for s := gp . waiting ; s != nil ; s = s . waitlink {
2025-10-02 11:57:58 +00:00
adjustpointer ( adjinfo , unsafe . Pointer ( & s . elem . vu ) )
adjustpointer ( adjinfo , unsafe . Pointer ( & s . elem . vp ) )
2014-11-11 17:04:34 -05:00
}
}
func fillstack ( stk stack , b byte ) {
for p := stk . lo ; p < stk . hi ; p ++ {
* ( * byte ) ( unsafe . Pointer ( p ) ) = b
}
}
2016-02-15 17:38:06 -05:00
func findsghi ( gp * g , stk stack ) uintptr {
var sghi uintptr
for sg := gp . waiting ; sg != nil ; sg = sg . waitlink {
2025-10-02 11:57:58 +00:00
p := sg . elem . uintptr ( ) + uintptr ( sg . c . get ( ) . elemsize )
2016-02-15 17:38:06 -05:00
if stk . lo <= p && p < stk . hi && p > sghi {
sghi = p
}
}
return sghi
}
// syncadjustsudogs adjusts gp's sudogs and copies the part of gp's
// stack they refer to while synchronizing with concurrent channel
// operations. It returns the number of bytes of stack copied.
func syncadjustsudogs ( gp * g , used uintptr , adjinfo * adjustinfo ) uintptr {
if gp . waiting == nil {
return 0
}
// Lock channels to prevent concurrent send/receive.
2016-07-07 17:43:08 -07:00
var lastc * hchan
2016-02-15 17:38:06 -05:00
for sg := gp . waiting ; sg != nil ; sg = sg . waitlink {
2025-10-02 11:57:58 +00:00
if sg . c . get ( ) != lastc {
2020-04-15 12:35:24 -07:00
// There is a ranking cycle here between gscan bit and
// hchan locks. Normally, we only allow acquiring hchan
// locks and then getting a gscan bit. In this case, we
// already have the gscan bit. We allow acquiring hchan
// locks here as a special case, since a deadlock can't
// happen because the G involved must already be
// suspended. So, we get a special hchan lock rank here
// that is lower than gscan, but doesn't allow acquiring
// any other locks other than hchan.
2025-10-02 11:57:58 +00:00
lockWithRank ( & sg . c . get ( ) . lock , lockRankHchanLeaf )
2016-07-07 17:43:08 -07:00
}
2025-10-02 11:57:58 +00:00
lastc = sg . c . get ( )
2016-02-15 17:38:06 -05:00
}
// Adjust sudogs.
adjustsudogs ( gp , adjinfo )
// Copy the part of the stack the sudogs point in to
// while holding the lock to prevent races on
// send/receive slots.
var sgsize uintptr
if adjinfo . sghi != 0 {
oldBot := adjinfo . old . hi - used
newBot := oldBot + adjinfo . delta
sgsize = adjinfo . sghi - oldBot
memmove ( unsafe . Pointer ( newBot ) , unsafe . Pointer ( oldBot ) , sgsize )
}
// Unlock channels.
2016-07-07 17:43:08 -07:00
lastc = nil
2016-02-15 17:38:06 -05:00
for sg := gp . waiting ; sg != nil ; sg = sg . waitlink {
2025-10-02 11:57:58 +00:00
if sg . c . get ( ) != lastc {
unlock ( & sg . c . get ( ) . lock )
2016-07-07 17:43:08 -07:00
}
2025-10-02 11:57:58 +00:00
lastc = sg . c . get ( )
2016-02-15 17:38:06 -05:00
}
return sgsize
}
2014-11-11 17:04:34 -05:00
// Copies gp's stack to a new stack of a different size.
2014-11-15 08:00:38 -05:00
// Caller must have changed gp status to Gcopystack.
runtime: make copystack/sudog synchronization more explicit
When we copy a stack of a goroutine blocked in a channel operation, we
have to be very careful because other goroutines may be writing to
that goroutine's stack. To handle this, stack copying acquires the
locks for the channels a goroutine is waiting on.
One complication is that stack growth may happen while a goroutine
holds these locks, in which case stack copying must *not* acquire
these locks because that would self-deadlock.
Currently, stack growth never acquires these locks because stack
growth only happens when a goroutine is running, which means it's
either not blocking on a channel or it's holding the channel locks
already. Stack shrinking always acquires these locks because shrinking
happens asynchronously, so the goroutine is never running, so there
are either no locks or they've been released by the goroutine.
However, we're about to change when stack shrinking can happen, which
is going to break the current rules. Rather than find a new way to
derive whether to acquire these locks or not, this CL simply adds a
flag to the g struct that indicates that stack copying should acquire
channel locks. This flag is set while the goroutine is blocked on a
channel op.
For #10958, #24543.
Change-Id: Ia2ac8831b1bfda98d39bb30285e144c4f7eaf9ab
Reviewed-on: https://go-review.googlesource.com/c/go/+/172982
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
2019-04-03 14:00:12 -04:00
func copystack ( gp * g , newsize uintptr ) {
2014-11-11 17:04:34 -05:00
if gp . syscallsp != 0 {
2014-12-27 20:58:00 -08:00
throw ( "stack growth not allowed in system call" )
2014-11-11 17:04:34 -05:00
}
old := gp . stack
if old . lo == 0 {
2014-12-27 20:58:00 -08:00
throw ( "nil stackbase" )
2014-11-11 17:04:34 -05:00
}
used := old . hi - gp . sched . sp
2021-04-12 22:33:54 +00:00
// Add just the difference to gcController.addScannableStack.
// g0 stacks never move, so this will never account for them.
// It's also fine if we have no P, addScannableStack can deal with
// that case.
gcController . addScannableStack ( getg ( ) . m . p . ptr ( ) , int64 ( newsize ) - int64 ( old . hi - old . lo ) )
2014-11-11 17:04:34 -05:00
// allocate new stack
2017-02-09 14:03:49 -05:00
new := stackalloc ( uint32 ( newsize ) )
2014-11-11 17:04:34 -05:00
if stackPoisonCopy != 0 {
fillstack ( new , 0xfd )
}
if stackDebug >= 1 {
2017-02-09 14:11:13 -05:00
print ( "copystack gp=" , gp , " [" , hex ( old . lo ) , " " , hex ( old . hi - used ) , " " , hex ( old . hi ) , "]" , " -> [" , hex ( new . lo ) , " " , hex ( new . hi - used ) , " " , hex ( new . hi ) , "]/" , newsize , "\n" )
2014-11-11 17:04:34 -05:00
}
2016-02-16 12:23:33 -05:00
// Compute adjustment.
2014-11-11 17:04:34 -05:00
var adjinfo adjustinfo
adjinfo . old = old
adjinfo . delta = new . hi - old . hi
2016-02-15 17:38:06 -05:00
// Adjust sudogs, synchronizing with channel ops if necessary.
ncopy := used
runtime: make copystack/sudog synchronization more explicit
When we copy a stack of a goroutine blocked in a channel operation, we
have to be very careful because other goroutines may be writing to
that goroutine's stack. To handle this, stack copying acquires the
locks for the channels a goroutine is waiting on.
One complication is that stack growth may happen while a goroutine
holds these locks, in which case stack copying must *not* acquire
these locks because that would self-deadlock.
Currently, stack growth never acquires these locks because stack
growth only happens when a goroutine is running, which means it's
either not blocking on a channel or it's holding the channel locks
already. Stack shrinking always acquires these locks because shrinking
happens asynchronously, so the goroutine is never running, so there
are either no locks or they've been released by the goroutine.
However, we're about to change when stack shrinking can happen, which
is going to break the current rules. Rather than find a new way to
derive whether to acquire these locks or not, this CL simply adds a
flag to the g struct that indicates that stack copying should acquire
channel locks. This flag is set while the goroutine is blocked on a
channel op.
For #10958, #24543.
Change-Id: Ia2ac8831b1bfda98d39bb30285e144c4f7eaf9ab
Reviewed-on: https://go-review.googlesource.com/c/go/+/172982
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
2019-04-03 14:00:12 -04:00
if ! gp . activeStackChans {
2022-08-17 14:13:06 +07:00
if newsize < old . hi - old . lo && gp . parkingOnChan . Load ( ) {
runtime: disable stack shrinking in activeStackChans race window
Currently activeStackChans is set before a goroutine blocks on a channel
operation in an unlockf passed to gopark. The trouble is that the
unlockf is called *after* the G's status is changed, and the G's status
is what is used by a concurrent mark worker (calling suspendG) to
determine that a G has successfully been suspended. In this window
between the status change and unlockf, the mark worker could try to
shrink the G's stack, and in particular observe that activeStackChans is
false. This observation will cause the mark worker to *not* synchronize
with concurrent channel operations when it should, and so updating
pointers in the sudog for the blocked goroutine (which may point to the
goroutine's stack) races with channel operations which may also
manipulate the pointer (read it, dereference it, update it, etc.).
Fix the problem by adding a new atomically-updated flag to the g struct
called parkingOnChan, which is non-zero in the race window above. Then,
in isShrinkStackSafe, check if parkingOnChan is zero. The race is
resolved like so:
* Blocking G sets parkingOnChan, then changes status in gopark.
* Mark worker successfully suspends blocking G.
* If the mark worker observes parkingOnChan is non-zero when checking
isShrinkStackSafe, then it's not safe to shrink (we're in the race
window).
* If the mark worker observes parkingOnChan as zero, then because
the mark worker observed the G status change, it can be sure that
gopark's unlockf completed, and gp.activeStackChans will be correct.
The risk of this change is low, since although it reduces the number of
places that stack shrinking is allowed, the window here is incredibly
small. Essentially, every place that it might crash now is replaced with
no shrink.
This change adds a test, but the race window is so small that it's hard
to trigger without a well-placed sleep in park_m. Also, this change
fixes stackGrowRecursive in proc_test.go to actually allocate a 128-byte
stack frame. It turns out the compiler was destructuring the "pad" field
and only allocating one uint64 on the stack.
Fixes #40641.
Change-Id: I7dfbe7d460f6972b8956116b137bc13bc24464e8
Reviewed-on: https://go-review.googlesource.com/c/go/+/247050
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Michael Pratt <mpratt@google.com>
Trust: Michael Knyszek <mknyszek@google.com>
2020-08-10 20:02:22 +00:00
// It's not safe for someone to shrink this stack while we're actively
// parking on a channel, but it is safe to grow since we do that
// ourselves and explicitly don't want to synchronize with channels
// since we could self-deadlock.
throw ( "racy sudog adjustment due to parking on channel" )
}
2016-02-15 17:38:06 -05:00
adjustsudogs ( gp , & adjinfo )
} else {
runtime: make copystack/sudog synchronization more explicit
When we copy a stack of a goroutine blocked in a channel operation, we
have to be very careful because other goroutines may be writing to
that goroutine's stack. To handle this, stack copying acquires the
locks for the channels a goroutine is waiting on.
One complication is that stack growth may happen while a goroutine
holds these locks, in which case stack copying must *not* acquire
these locks because that would self-deadlock.
Currently, stack growth never acquires these locks because stack
growth only happens when a goroutine is running, which means it's
either not blocking on a channel or it's holding the channel locks
already. Stack shrinking always acquires these locks because shrinking
happens asynchronously, so the goroutine is never running, so there
are either no locks or they've been released by the goroutine.
However, we're about to change when stack shrinking can happen, which
is going to break the current rules. Rather than find a new way to
derive whether to acquire these locks or not, this CL simply adds a
flag to the g struct that indicates that stack copying should acquire
channel locks. This flag is set while the goroutine is blocked on a
channel op.
For #10958, #24543.
Change-Id: Ia2ac8831b1bfda98d39bb30285e144c4f7eaf9ab
Reviewed-on: https://go-review.googlesource.com/c/go/+/172982
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
2019-04-03 14:00:12 -04:00
// sudogs may be pointing in to the stack and gp has
// released channel locks, so other goroutines could
// be writing to gp's stack. Find the highest such
// pointer so we can handle everything there and below
// carefully. (This shouldn't be far from the bottom
// of the stack, so there's little cost in handling
// everything below it carefully.)
2016-02-15 17:38:06 -05:00
adjinfo . sghi = findsghi ( gp , old )
// Synchronize with channel ops and copy the part of
// the stack they may interact with.
ncopy -= syncadjustsudogs ( gp , used , & adjinfo )
}
// Copy the stack (or the rest of it) to the new location
memmove ( unsafe . Pointer ( new . hi - ncopy ) , unsafe . Pointer ( old . hi - ncopy ) , ncopy )
2016-02-16 12:23:33 -05:00
2016-02-15 17:38:06 -05:00
// Adjust remaining structures that have pointers into stacks.
// We have to do most of these before we traceback the new
// stack because gentraceback uses them.
2014-11-11 17:04:34 -05:00
adjustctxt ( gp , & adjinfo )
adjustdefers ( gp , & adjinfo )
adjustpanics ( gp , & adjinfo )
2016-02-15 17:38:06 -05:00
if adjinfo . sghi != 0 {
adjinfo . sghi += adjinfo . delta
}
2014-11-11 17:04:34 -05:00
// Swap out old stack for new one
gp . stack = new
2023-04-19 14:58:47 -04:00
gp . stackguard0 = new . lo + stackGuard // NOTE: might clobber a preempt request
2014-11-11 17:04:34 -05:00
gp . sched . sp = new . hi - used
2015-08-26 11:39:10 -04:00
gp . stktopsp += adjinfo . delta
2014-11-11 17:04:34 -05:00
2016-02-16 12:23:33 -05:00
// Adjust pointers in the new stack.
2023-02-13 16:20:54 -05:00
var u unwinder
for u . init ( gp , 0 ) ; u . valid ( ) ; u . next ( ) {
adjustframe ( & u . frame , & adjinfo )
}
2016-02-16 12:23:33 -05:00
runtime: add valgrind instrumentation
Add build tag gated Valgrind annotations to the runtime which let it
understand how the runtime manages memory. This allows for Go binaries
to be run under Valgrind without emitting spurious errors.
Instead of adding the Valgrind headers to the tree, and using cgo to
call the various Valgrind client request macros, we just add an assembly
function which emits the necessary instructions to trigger client
requests.
In particular we add instrumentation of the memory allocator, using a
two-level mempool structure (as described in the Valgrind manual [0]).
We also add annotations which allow Valgrind to track which memory we
use for stacks, which seems necessary to let it properly function.
We describe the memory model to Valgrind as follows: we treat heap
arenas as a "pool" created with VALGRIND_CREATE_MEMPOOL_EXT (so that we
can use VALGRIND_MEMPOOL_METAPOOL and VALGRIND_MEMPOOL_AUTO_FREE).
Within the pool we treat spans as "superblocks", annotated with
VALGRIND_MEMPOOL_ALLOC. We then allocate individual objects within spans
with VALGRIND_MALLOCLIKE_BLOCK.
It should be noted that running binaries under Valgrind can be _quite
slow_, and certain operations, such as running the GC, can be _very
slow_. It is recommended to run programs with GOGC=off. Additionally,
async preemption should be turned off, since it'll cause strange
behavior (GODEBUG=asyncpreemptoff=1).
Running Valgrind with --leak-check=yes will result in some errors
resulting from some things not being marked fully free'd. These likely
need more annotations to rectify, but for now it is recommended to run
with --leak-check=off.
Updates #73602
[0] https://valgrind.org/docs/manual/mc-manual.html#mc-manual.mempools
Change-Id: I71b26c47d7084de71ef1e03947ef6b1cc6d38301
Reviewed-on: https://go-review.googlesource.com/c/go/+/674077
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
2025-03-22 00:58:55 +00:00
if valgrindenabled {
if gp . valgrindStackID == 0 {
gp . valgrindStackID = valgrindRegisterStack ( unsafe . Pointer ( new . lo ) , unsafe . Pointer ( new . hi ) )
} else {
valgrindChangeStack ( gp . valgrindStackID , unsafe . Pointer ( new . lo ) , unsafe . Pointer ( new . hi ) )
}
}
2014-11-11 17:04:34 -05:00
// free old stack
2025-09-25 17:26:03 +01:00
if goexperiment . RuntimeSecret && gp . secret > 0 {
// Some portion of the old stack has secret stuff on it.
// We don't really know where we entered secret mode,
// so just clear the whole thing.
// TODO(dmo): traceback until we hit secret.Do? clearing
// is fast and optimized, might not be worth it.
memclrNoHeapPointers ( unsafe . Pointer ( old . lo ) , old . hi - old . lo )
// The memmove call above might put secrets from the stack into registers.
secretEraseRegisters ( )
}
2014-11-11 17:04:34 -05:00
if stackPoisonCopy != 0 {
fillstack ( old , 0xfc )
}
2017-02-09 14:11:13 -05:00
stackfree ( old )
2014-11-11 17:04:34 -05:00
}
// round x up to a power of 2.
func round2 ( x int32 ) int32 {
s := uint ( 0 )
for 1 << s < x {
s ++
}
return 1 << s
}
// Called from runtime·morestack when more stack is needed.
// Allocate larger stack and relocate to new stack.
// Stack growth is multiplicative, for constant amortized cost.
//
// g->atomicstatus will be Grunning or Gscanrunning upon entry.
2019-09-27 12:31:33 -04:00
// If the scheduler is trying to stop this g, then it will set preemptStop.
2016-10-19 18:27:39 -04:00
//
runtime: remove write barriers from newstack, gogo
Currently, newstack and gogo have write barriers for maintaining the
context register saved in g.sched.ctxt. This is troublesome, because
newstack can be called from go:nowritebarrierrec places that can't
allow write barriers. It happens to be benign because g.sched.ctxt
will always be nil on entry to newstack *and* it so happens the
incoming ctxt will also always be nil in these contexts (I
think/hope), but this is playing with fire. It's also desirable to
mark newstack go:nowritebarrierrec to prevent any other, non-benign
write barriers from creeping in, but we can't do that right now
because of this one write barrier.
Fix all of this by observing that g.sched.ctxt is really just a saved
live pointer register. Hence, we can shade it when we scan g's stack
and otherwise move it back and forth between the actual context
register and g.sched.ctxt without write barriers. This means we can
save it in morestack along with all of the other g.sched, eliminate
the save from newstack along with its troublesome write barrier, and
eliminate the shenanigans in gogo to invoke the write barrier when
restoring it.
Once we've done all of this, we can mark newstack
go:nowritebarrierrec.
Fixes #22385.
For #22460.
Change-Id: I43c24958e3f6785b53c1350e1e83c2844e0d1522
Reviewed-on: https://go-review.googlesource.com/72553
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2017-10-22 21:37:05 -04:00
// This must be nowritebarrierrec because it can be called as part of
// stack growth from other nowritebarrierrec functions, but the
// compiler doesn't check this.
//
//go:nowritebarrierrec
func newstack ( ) {
2014-11-11 17:04:34 -05:00
thisg := getg ( )
// TODO: double check all gp. shouldn't be getg().
2014-12-22 10:53:51 -05:00
if thisg . m . morebuf . g . ptr ( ) . stackguard0 == stackFork {
2014-12-27 20:58:00 -08:00
throw ( "stack growth after fork" )
2014-11-11 17:04:34 -05:00
}
2014-12-22 10:53:51 -05:00
if thisg . m . morebuf . g . ptr ( ) != thisg . m . curg {
2015-08-24 21:24:23 -04:00
print ( "runtime: newstack called from g=" , hex ( thisg . m . morebuf . g ) , "\n" + "\tm=" , thisg . m , " m->curg=" , thisg . m . curg , " m->g0=" , thisg . m . g0 , " m->gsignal=" , thisg . m . gsignal , "\n" )
2014-11-11 17:04:34 -05:00
morebuf := thisg . m . morebuf
2014-12-22 10:53:51 -05:00
traceback ( morebuf . pc , morebuf . sp , morebuf . lr , morebuf . g . ptr ( ) )
2014-12-27 20:58:00 -08:00
throw ( "runtime: wrong goroutine in newstack" )
2014-11-11 17:04:34 -05:00
}
2016-10-19 18:27:39 -04:00
gp := thisg . m . curg
2025-09-25 17:26:03 +01:00
if goexperiment . RuntimeSecret && gp . secret > 0 {
// If we're entering here from a secret context, clear
// all the registers. This is important because we
// might context switch to a different goroutine which
// is not in secret mode, and it will not be careful
// about clearing its registers.
secretEraseRegisters ( )
}
2016-10-19 18:27:39 -04:00
2014-11-11 17:04:34 -05:00
if thisg . m . curg . throwsplit {
// Update syscallsp, syscallpc in case traceback uses them.
morebuf := thisg . m . morebuf
gp . syscallsp = morebuf . sp
gp . syscallpc = morebuf . pc
2017-12-14 15:32:12 -05:00
pcname , pcoff := "(unknown)" , uintptr ( 0 )
f := findfunc ( gp . sched . pc )
if f . valid ( ) {
pcname = funcname ( f )
2021-09-21 14:05:57 -07:00
pcoff = gp . sched . pc - f . entry ( )
2017-12-14 15:32:12 -05:00
}
print ( "runtime: newstack at " , pcname , "+" , hex ( pcoff ) ,
" sp=" , hex ( gp . sched . sp ) , " stack=[" , hex ( gp . stack . lo ) , ", " , hex ( gp . stack . hi ) , "]\n" ,
2014-11-11 17:04:34 -05:00
"\tmorebuf={pc:" , hex ( morebuf . pc ) , " sp:" , hex ( morebuf . sp ) , " lr:" , hex ( morebuf . lr ) , "}\n" ,
"\tsched={pc:" , hex ( gp . sched . pc ) , " sp:" , hex ( gp . sched . sp ) , " lr:" , hex ( gp . sched . lr ) , " ctxt:" , gp . sched . ctxt , "}\n" )
2014-12-22 10:53:51 -05:00
2017-11-22 15:29:03 -05:00
thisg . m . traceback = 2 // Include runtime frames
2014-12-22 10:53:51 -05:00
traceback ( morebuf . pc , morebuf . sp , morebuf . lr , gp )
2014-12-27 20:58:00 -08:00
throw ( "runtime: stack split at bad time" )
2014-11-11 17:04:34 -05:00
}
morebuf := thisg . m . morebuf
thisg . m . morebuf . pc = 0
thisg . m . morebuf . lr = 0
thisg . m . morebuf . sp = 0
2014-12-22 10:53:51 -05:00
thisg . m . morebuf . g = 0
2015-01-13 15:55:16 -05:00
2015-01-14 16:36:41 -05:00
// NOTE: stackguard0 may change underfoot, if another thread
// is about to try to preempt gp. Read it just once and use that same
// value now and below.
2021-11-05 15:58:34 -04:00
stackguard0 := atomic . Loaduintptr ( & gp . stackguard0 )
2015-01-14 16:36:41 -05:00
2015-01-13 15:55:16 -05:00
// Be conservative about where we preempt.
// We are interested in preempting user Go code, not runtime code.
2015-01-30 15:30:41 -05:00
// If we're holding locks, mallocing, or preemption is disabled, don't
// preempt.
2015-01-13 15:55:16 -05:00
// This check is very early in newstack so that even the status change
// from Grunning to Gwaiting and back doesn't happen in this case.
// That status change by itself can be viewed as a small preemption,
// because the GC might change Gwaiting to Gscanwaiting, and then
// this goroutine has to wait for the GC to finish before continuing.
// If the GC is in some way dependent on this goroutine (for example,
// it needs a lock held by the goroutine), that small preemption turns
// into a real deadlock.
2021-11-05 15:58:34 -04:00
preempt := stackguard0 == stackPreempt
2015-01-14 16:36:41 -05:00
if preempt {
2019-10-04 18:54:00 -04:00
if ! canPreemptM ( thisg . m ) {
2015-01-13 15:55:16 -05:00
// Let the goroutine keep running for now.
// gp->preempt is set, so it will be preempted next time.
2023-04-19 14:58:47 -04:00
gp . stackguard0 = gp . stack . lo + stackGuard
2015-01-13 15:55:16 -05:00
gogo ( & gp . sched ) // never return
}
}
2014-11-11 17:04:34 -05:00
if gp . stack . lo == 0 {
2014-12-27 20:58:00 -08:00
throw ( "missing stack in newstack" )
2014-11-11 17:04:34 -05:00
}
sp := gp . sched . sp
2021-06-16 21:25:19 +00:00
if goarch . ArchFamily == goarch . AMD64 || goarch . ArchFamily == goarch . I386 || goarch . ArchFamily == goarch . WASM {
2014-11-11 17:04:34 -05:00
// The call to morestack cost a word.
2021-06-16 23:05:44 +00:00
sp -= goarch . PtrSize
2014-11-11 17:04:34 -05:00
}
if stackDebug >= 1 || sp < gp . stack . lo {
print ( "runtime: newstack sp=" , hex ( sp ) , " stack=[" , hex ( gp . stack . lo ) , ", " , hex ( gp . stack . hi ) , "]\n" ,
"\tmorebuf={pc:" , hex ( morebuf . pc ) , " sp:" , hex ( morebuf . sp ) , " lr:" , hex ( morebuf . lr ) , "}\n" ,
"\tsched={pc:" , hex ( gp . sched . pc ) , " sp:" , hex ( gp . sched . sp ) , " lr:" , hex ( gp . sched . lr ) , " ctxt:" , gp . sched . ctxt , "}\n" )
}
if sp < gp . stack . lo {
2018-03-13 21:21:25 -07:00
print ( "runtime: gp=" , gp , ", goid=" , gp . goid , ", gp->status=" , hex ( readgstatus ( gp ) ) , "\n " )
2014-11-11 17:04:34 -05:00
print ( "runtime: split stack overflow: " , hex ( sp ) , " < " , hex ( gp . stack . lo ) , "\n" )
2014-12-27 20:58:00 -08:00
throw ( "runtime: split stack overflow" )
2014-11-11 17:04:34 -05:00
}
2015-01-14 16:36:41 -05:00
if preempt {
2014-11-11 17:04:34 -05:00
if gp == thisg . m . g0 {
2014-12-27 20:58:00 -08:00
throw ( "runtime: preempt g0" )
2014-11-11 17:04:34 -05:00
}
2015-04-17 00:21:30 -04:00
if thisg . m . p == 0 && thisg . m . locks == 0 {
2014-12-27 20:58:00 -08:00
throw ( "runtime: g is running but p is not" )
2014-11-11 17:04:34 -05:00
}
2019-09-27 12:27:51 -04:00
2019-09-27 14:34:05 -04:00
if gp . preemptShrink {
// We're at a synchronous safe point now, so
// do the pending stack shrink.
gp . preemptShrink = false
shrinkstack ( gp )
}
2025-06-27 00:59:49 +00:00
// Set a flag indicated that we've been synchronously preempted.
gp . syncSafePoint = true
2019-09-27 12:27:51 -04:00
if gp . preemptStop {
preemptPark ( gp ) // never returns
}
2014-11-11 17:04:34 -05:00
// Act like goroutine called runtime.Gosched.
2014-12-12 18:41:57 +01:00
gopreempt_m ( gp ) // never return
2014-11-11 17:04:34 -05:00
}
// Allocate a bigger segment and move the stack.
2017-02-09 14:11:13 -05:00
oldsize := gp . stack . hi - gp . stack . lo
2014-11-11 17:04:34 -05:00
newsize := oldsize * 2
2020-03-27 11:17:00 -07:00
// Make sure we grow at least as much as needed to fit the new frame.
// (This is just an optimization - the caller of morestack will
// recheck the bounds on return.)
if f := findfunc ( gp . sched . pc ) ; f . valid ( ) {
max := uintptr ( funcMaxSPDelta ( f ) )
2023-04-19 14:58:47 -04:00
needed := max + stackGuard
2021-07-02 08:01:20 +00:00
used := gp . stack . hi - gp . sched . sp
for newsize - used < needed {
2020-03-27 11:17:00 -07:00
newsize *= 2
}
}
2021-11-05 15:58:34 -04:00
if stackguard0 == stackForceMove {
2021-04-01 16:50:53 -04:00
// Forced stack movement used for debugging.
// Don't double the stack (or we may quickly run out
// if this is done repeatedly).
newsize = oldsize
}
2020-09-18 19:15:41 +03:00
if newsize > maxstacksize || newsize > maxstackceiling {
if maxstacksize < maxstackceiling {
print ( "runtime: goroutine stack exceeds " , maxstacksize , "-byte limit\n" )
} else {
print ( "runtime: goroutine stack exceeds " , maxstackceiling , "-byte limit\n" )
}
2019-11-14 21:34:35 -05:00
print ( "runtime: sp=" , hex ( sp ) , " stack=[" , hex ( gp . stack . lo ) , ", " , hex ( gp . stack . hi ) , "]\n" )
2014-12-27 20:58:00 -08:00
throw ( "stack overflow" )
2014-11-11 17:04:34 -05:00
}
2016-02-25 15:37:40 -05:00
// The goroutine must be executing in order to call newstack,
// so it must be Grunning (or Gscanrunning).
casgstatus ( gp , _Grunning , _Gcopystack )
2014-11-15 08:00:38 -05:00
// The concurrent GC will not scan the stack while we are doing the copy since
// the gp is in a Gcopystack status.
runtime: make copystack/sudog synchronization more explicit
When we copy a stack of a goroutine blocked in a channel operation, we
have to be very careful because other goroutines may be writing to
that goroutine's stack. To handle this, stack copying acquires the
locks for the channels a goroutine is waiting on.
One complication is that stack growth may happen while a goroutine
holds these locks, in which case stack copying must *not* acquire
these locks because that would self-deadlock.
Currently, stack growth never acquires these locks because stack
growth only happens when a goroutine is running, which means it's
either not blocking on a channel or it's holding the channel locks
already. Stack shrinking always acquires these locks because shrinking
happens asynchronously, so the goroutine is never running, so there
are either no locks or they've been released by the goroutine.
However, we're about to change when stack shrinking can happen, which
is going to break the current rules. Rather than find a new way to
derive whether to acquire these locks or not, this CL simply adds a
flag to the g struct that indicates that stack copying should acquire
channel locks. This flag is set while the goroutine is blocked on a
channel op.
For #10958, #24543.
Change-Id: Ia2ac8831b1bfda98d39bb30285e144c4f7eaf9ab
Reviewed-on: https://go-review.googlesource.com/c/go/+/172982
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
2019-04-03 14:00:12 -04:00
copystack ( gp , newsize )
2014-11-11 17:04:34 -05:00
if stackDebug >= 1 {
print ( "stack grow done\n" )
}
2014-11-15 08:00:38 -05:00
casgstatus ( gp , _Gcopystack , _Grunning )
2014-11-11 17:04:34 -05:00
gogo ( & gp . sched )
}
//go:nosplit
func nilfunc ( ) {
* ( * uint8 ) ( nil ) = 0
}
// adjust Gobuf as if it executed a call to fn
2021-02-15 11:26:58 -05:00
// and then stopped before the first instruction in fn.
2014-11-11 17:04:34 -05:00
func gostartcallfn ( gobuf * gobuf , fv * funcval ) {
var fn unsafe . Pointer
if fv != nil {
2015-10-15 14:33:50 -07:00
fn = unsafe . Pointer ( fv . fn )
2014-11-11 17:04:34 -05:00
} else {
2021-05-21 13:37:19 -04:00
fn = unsafe . Pointer ( abi . FuncPCABIInternal ( nilfunc ) )
2014-11-11 17:04:34 -05:00
}
2015-10-15 14:33:50 -07:00
gostartcall ( gobuf , fn , unsafe . Pointer ( fv ) )
2014-11-11 17:04:34 -05:00
}
2019-09-27 14:34:05 -04:00
// isShrinkStackSafe returns whether it's safe to attempt to shrink
// gp's stack. Shrinking the stack is only safe when we have precise
runtime: take a stack trace during tracing only when we own the stack
Currently, the execution tracer may attempt to take a stack trace of a
goroutine whose stack it does not own. For example, if the goroutine is
in _Grunnable or _Gwaiting. This is easily fixed in all cases by simply
moving the emission of GoStop and GoBlock events to before the
casgstatus happens. The goroutine status is what is used to signal stack
ownership, and the GC may shrink a goroutine's stack if it can acquire
the scan bit.
Although this is easily fixed, the interaction here is very subtle,
because stack ownership is only implicit in the goroutine's scan status.
To make this invariant more maintainable and less error-prone in the
future, this change adds a GODEBUG setting that checks, at the point of
taking a stack trace, whether the caller owns the goroutine. This check
is not quite perfect because there's no way for the stack tracing code
to know that the _Gscan bit was acquired by the caller, so for
simplicity it assumes that it was the caller that acquired the scan bit.
In all other cases however, we can check for ownership precisely. At the
very least, this check is sufficient to catch the issue this change is
fixing.
To make sure this debug check doesn't bitrot, it's always enabled during
trace testing. This new mode has actually caught a few other issues
already, so this change fixes them.
One issue that this debug mode caught was that it's not safe to take a
stack trace of a _Gwaiting goroutine that's being unparked.
Another much bigger issue this debug mode caught was the fact that the
execution tracer could try to take a stack trace of a G that was in
_Gwaiting solely to avoid a deadlock in the GC. The execution tracer
already has a partial list of these cases since they're modeled as the
goroutine just executing as normal in the tracer, but this change takes
the list and makes it more formal. In this specific case, we now prevent
the GC from shrinking the stacks of goroutines in this state if tracing
is enabled. The stack traces from these scenarios are too useful to
discard, but there is indeed a race here between the tracer and any
attempt to shrink the stack by the GC.
Change-Id: I019850dabc8cede202fd6dcc0a4b1f16764209fb
Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest,gotip-linux-amd64-longtest-race
Reviewed-on: https://go-review.googlesource.com/c/go/+/573155
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Auto-Submit: Michael Knyszek <mknyszek@google.com>
2024-03-21 18:49:05 +00:00
// pointer maps for all frames on the stack. The caller must hold the
// _Gscan bit for gp or must be running gp itself.
2019-09-27 14:34:05 -04:00
func isShrinkStackSafe ( gp * g ) bool {
// We can't copy the stack if we're in a syscall.
// The syscall might have pointers into the stack and
// often we don't have precise pointer maps for the innermost
// frames.
runtime: take a stack trace during tracing only when we own the stack
Currently, the execution tracer may attempt to take a stack trace of a
goroutine whose stack it does not own. For example, if the goroutine is
in _Grunnable or _Gwaiting. This is easily fixed in all cases by simply
moving the emission of GoStop and GoBlock events to before the
casgstatus happens. The goroutine status is what is used to signal stack
ownership, and the GC may shrink a goroutine's stack if it can acquire
the scan bit.
Although this is easily fixed, the interaction here is very subtle,
because stack ownership is only implicit in the goroutine's scan status.
To make this invariant more maintainable and less error-prone in the
future, this change adds a GODEBUG setting that checks, at the point of
taking a stack trace, whether the caller owns the goroutine. This check
is not quite perfect because there's no way for the stack tracing code
to know that the _Gscan bit was acquired by the caller, so for
simplicity it assumes that it was the caller that acquired the scan bit.
In all other cases however, we can check for ownership precisely. At the
very least, this check is sufficient to catch the issue this change is
fixing.
To make sure this debug check doesn't bitrot, it's always enabled during
trace testing. This new mode has actually caught a few other issues
already, so this change fixes them.
One issue that this debug mode caught was that it's not safe to take a
stack trace of a _Gwaiting goroutine that's being unparked.
Another much bigger issue this debug mode caught was the fact that the
execution tracer could try to take a stack trace of a G that was in
_Gwaiting solely to avoid a deadlock in the GC. The execution tracer
already has a partial list of these cases since they're modeled as the
goroutine just executing as normal in the tracer, but this change takes
the list and makes it more formal. In this specific case, we now prevent
the GC from shrinking the stacks of goroutines in this state if tracing
is enabled. The stack traces from these scenarios are too useful to
discard, but there is indeed a race here between the tracer and any
attempt to shrink the stack by the GC.
Change-Id: I019850dabc8cede202fd6dcc0a4b1f16764209fb
Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest,gotip-linux-amd64-longtest-race
Reviewed-on: https://go-review.googlesource.com/c/go/+/573155
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Auto-Submit: Michael Knyszek <mknyszek@google.com>
2024-03-21 18:49:05 +00:00
if gp . syscallsp != 0 {
return false
}
2019-10-08 13:23:51 -04:00
// We also can't copy the stack if we're at an asynchronous
// safe-point because we don't have precise pointer maps for
// all frames.
runtime: take a stack trace during tracing only when we own the stack
Currently, the execution tracer may attempt to take a stack trace of a
goroutine whose stack it does not own. For example, if the goroutine is
in _Grunnable or _Gwaiting. This is easily fixed in all cases by simply
moving the emission of GoStop and GoBlock events to before the
casgstatus happens. The goroutine status is what is used to signal stack
ownership, and the GC may shrink a goroutine's stack if it can acquire
the scan bit.
Although this is easily fixed, the interaction here is very subtle,
because stack ownership is only implicit in the goroutine's scan status.
To make this invariant more maintainable and less error-prone in the
future, this change adds a GODEBUG setting that checks, at the point of
taking a stack trace, whether the caller owns the goroutine. This check
is not quite perfect because there's no way for the stack tracing code
to know that the _Gscan bit was acquired by the caller, so for
simplicity it assumes that it was the caller that acquired the scan bit.
In all other cases however, we can check for ownership precisely. At the
very least, this check is sufficient to catch the issue this change is
fixing.
To make sure this debug check doesn't bitrot, it's always enabled during
trace testing. This new mode has actually caught a few other issues
already, so this change fixes them.
One issue that this debug mode caught was that it's not safe to take a
stack trace of a _Gwaiting goroutine that's being unparked.
Another much bigger issue this debug mode caught was the fact that the
execution tracer could try to take a stack trace of a G that was in
_Gwaiting solely to avoid a deadlock in the GC. The execution tracer
already has a partial list of these cases since they're modeled as the
goroutine just executing as normal in the tracer, but this change takes
the list and makes it more formal. In this specific case, we now prevent
the GC from shrinking the stacks of goroutines in this state if tracing
is enabled. The stack traces from these scenarios are too useful to
discard, but there is indeed a race here between the tracer and any
attempt to shrink the stack by the GC.
Change-Id: I019850dabc8cede202fd6dcc0a4b1f16764209fb
Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest,gotip-linux-amd64-longtest-race
Reviewed-on: https://go-review.googlesource.com/c/go/+/573155
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Auto-Submit: Michael Knyszek <mknyszek@google.com>
2024-03-21 18:49:05 +00:00
if gp . asyncSafePoint {
return false
}
runtime: disable stack shrinking in activeStackChans race window
Currently activeStackChans is set before a goroutine blocks on a channel
operation in an unlockf passed to gopark. The trouble is that the
unlockf is called *after* the G's status is changed, and the G's status
is what is used by a concurrent mark worker (calling suspendG) to
determine that a G has successfully been suspended. In this window
between the status change and unlockf, the mark worker could try to
shrink the G's stack, and in particular observe that activeStackChans is
false. This observation will cause the mark worker to *not* synchronize
with concurrent channel operations when it should, and so updating
pointers in the sudog for the blocked goroutine (which may point to the
goroutine's stack) races with channel operations which may also
manipulate the pointer (read it, dereference it, update it, etc.).
Fix the problem by adding a new atomically-updated flag to the g struct
called parkingOnChan, which is non-zero in the race window above. Then,
in isShrinkStackSafe, check if parkingOnChan is zero. The race is
resolved like so:
* Blocking G sets parkingOnChan, then changes status in gopark.
* Mark worker successfully suspends blocking G.
* If the mark worker observes parkingOnChan is non-zero when checking
isShrinkStackSafe, then it's not safe to shrink (we're in the race
window).
* If the mark worker observes parkingOnChan as zero, then because
the mark worker observed the G status change, it can be sure that
gopark's unlockf completed, and gp.activeStackChans will be correct.
The risk of this change is low, since although it reduces the number of
places that stack shrinking is allowed, the window here is incredibly
small. Essentially, every place that it might crash now is replaced with
no shrink.
This change adds a test, but the race window is so small that it's hard
to trigger without a well-placed sleep in park_m. Also, this change
fixes stackGrowRecursive in proc_test.go to actually allocate a 128-byte
stack frame. It turns out the compiler was destructuring the "pad" field
and only allocating one uint64 on the stack.
Fixes #40641.
Change-Id: I7dfbe7d460f6972b8956116b137bc13bc24464e8
Reviewed-on: https://go-review.googlesource.com/c/go/+/247050
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Michael Pratt <mpratt@google.com>
Trust: Michael Knyszek <mknyszek@google.com>
2020-08-10 20:02:22 +00:00
// We also can't *shrink* the stack in the window between the
// goroutine calling gopark to park on a channel and
// gp.activeStackChans being set.
runtime: take a stack trace during tracing only when we own the stack
Currently, the execution tracer may attempt to take a stack trace of a
goroutine whose stack it does not own. For example, if the goroutine is
in _Grunnable or _Gwaiting. This is easily fixed in all cases by simply
moving the emission of GoStop and GoBlock events to before the
casgstatus happens. The goroutine status is what is used to signal stack
ownership, and the GC may shrink a goroutine's stack if it can acquire
the scan bit.
Although this is easily fixed, the interaction here is very subtle,
because stack ownership is only implicit in the goroutine's scan status.
To make this invariant more maintainable and less error-prone in the
future, this change adds a GODEBUG setting that checks, at the point of
taking a stack trace, whether the caller owns the goroutine. This check
is not quite perfect because there's no way for the stack tracing code
to know that the _Gscan bit was acquired by the caller, so for
simplicity it assumes that it was the caller that acquired the scan bit.
In all other cases however, we can check for ownership precisely. At the
very least, this check is sufficient to catch the issue this change is
fixing.
To make sure this debug check doesn't bitrot, it's always enabled during
trace testing. This new mode has actually caught a few other issues
already, so this change fixes them.
One issue that this debug mode caught was that it's not safe to take a
stack trace of a _Gwaiting goroutine that's being unparked.
Another much bigger issue this debug mode caught was the fact that the
execution tracer could try to take a stack trace of a G that was in
_Gwaiting solely to avoid a deadlock in the GC. The execution tracer
already has a partial list of these cases since they're modeled as the
goroutine just executing as normal in the tracer, but this change takes
the list and makes it more formal. In this specific case, we now prevent
the GC from shrinking the stacks of goroutines in this state if tracing
is enabled. The stack traces from these scenarios are too useful to
discard, but there is indeed a race here between the tracer and any
attempt to shrink the stack by the GC.
Change-Id: I019850dabc8cede202fd6dcc0a4b1f16764209fb
Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest,gotip-linux-amd64-longtest-race
Reviewed-on: https://go-review.googlesource.com/c/go/+/573155
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Auto-Submit: Michael Knyszek <mknyszek@google.com>
2024-03-21 18:49:05 +00:00
if gp . parkingOnChan . Load ( ) {
return false
}
2025-08-02 03:26:43 +00:00
// We also can't copy the stack while a gp is in _Gwaiting solely
// to make itself available to suspendG.
//
runtime: take a stack trace during tracing only when we own the stack
Currently, the execution tracer may attempt to take a stack trace of a
goroutine whose stack it does not own. For example, if the goroutine is
in _Grunnable or _Gwaiting. This is easily fixed in all cases by simply
moving the emission of GoStop and GoBlock events to before the
casgstatus happens. The goroutine status is what is used to signal stack
ownership, and the GC may shrink a goroutine's stack if it can acquire
the scan bit.
Although this is easily fixed, the interaction here is very subtle,
because stack ownership is only implicit in the goroutine's scan status.
To make this invariant more maintainable and less error-prone in the
future, this change adds a GODEBUG setting that checks, at the point of
taking a stack trace, whether the caller owns the goroutine. This check
is not quite perfect because there's no way for the stack tracing code
to know that the _Gscan bit was acquired by the caller, so for
simplicity it assumes that it was the caller that acquired the scan bit.
In all other cases however, we can check for ownership precisely. At the
very least, this check is sufficient to catch the issue this change is
fixing.
To make sure this debug check doesn't bitrot, it's always enabled during
trace testing. This new mode has actually caught a few other issues
already, so this change fixes them.
One issue that this debug mode caught was that it's not safe to take a
stack trace of a _Gwaiting goroutine that's being unparked.
Another much bigger issue this debug mode caught was the fact that the
execution tracer could try to take a stack trace of a G that was in
_Gwaiting solely to avoid a deadlock in the GC. The execution tracer
already has a partial list of these cases since they're modeled as the
goroutine just executing as normal in the tracer, but this change takes
the list and makes it more formal. In this specific case, we now prevent
the GC from shrinking the stacks of goroutines in this state if tracing
is enabled. The stack traces from these scenarios are too useful to
discard, but there is indeed a race here between the tracer and any
attempt to shrink the stack by the GC.
Change-Id: I019850dabc8cede202fd6dcc0a4b1f16764209fb
Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest,gotip-linux-amd64-longtest-race
Reviewed-on: https://go-review.googlesource.com/c/go/+/573155
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Auto-Submit: Michael Knyszek <mknyszek@google.com>
2024-03-21 18:49:05 +00:00
// In these cases, the G is actually executing on the system
2025-08-02 03:26:43 +00:00
// stack, and the execution tracer, mutex profiler, etc. may want
// to take a stack trace of the G's stack.
//
// Note: it's safe to access gp.waitreason here.
// We're only calling isShrinkStackSafe if we took ownership of the
runtime: take a stack trace during tracing only when we own the stack
Currently, the execution tracer may attempt to take a stack trace of a
goroutine whose stack it does not own. For example, if the goroutine is
in _Grunnable or _Gwaiting. This is easily fixed in all cases by simply
moving the emission of GoStop and GoBlock events to before the
casgstatus happens. The goroutine status is what is used to signal stack
ownership, and the GC may shrink a goroutine's stack if it can acquire
the scan bit.
Although this is easily fixed, the interaction here is very subtle,
because stack ownership is only implicit in the goroutine's scan status.
To make this invariant more maintainable and less error-prone in the
future, this change adds a GODEBUG setting that checks, at the point of
taking a stack trace, whether the caller owns the goroutine. This check
is not quite perfect because there's no way for the stack tracing code
to know that the _Gscan bit was acquired by the caller, so for
simplicity it assumes that it was the caller that acquired the scan bit.
In all other cases however, we can check for ownership precisely. At the
very least, this check is sufficient to catch the issue this change is
fixing.
To make sure this debug check doesn't bitrot, it's always enabled during
trace testing. This new mode has actually caught a few other issues
already, so this change fixes them.
One issue that this debug mode caught was that it's not safe to take a
stack trace of a _Gwaiting goroutine that's being unparked.
Another much bigger issue this debug mode caught was the fact that the
execution tracer could try to take a stack trace of a G that was in
_Gwaiting solely to avoid a deadlock in the GC. The execution tracer
already has a partial list of these cases since they're modeled as the
goroutine just executing as normal in the tracer, but this change takes
the list and makes it more formal. In this specific case, we now prevent
the GC from shrinking the stacks of goroutines in this state if tracing
is enabled. The stack traces from these scenarios are too useful to
discard, but there is indeed a race here between the tracer and any
attempt to shrink the stack by the GC.
Change-Id: I019850dabc8cede202fd6dcc0a4b1f16764209fb
Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest,gotip-linux-amd64-longtest-race
Reviewed-on: https://go-review.googlesource.com/c/go/+/573155
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Auto-Submit: Michael Knyszek <mknyszek@google.com>
2024-03-21 18:49:05 +00:00
// G with the _Gscan bit. This prevents the goroutine from transitioning,
// which prevents gp.waitreason from changing.
2025-08-02 03:26:43 +00:00
if readgstatus ( gp ) &^ _Gscan == _Gwaiting && gp . waitreason . isWaitingForSuspendG ( ) {
runtime: take a stack trace during tracing only when we own the stack
Currently, the execution tracer may attempt to take a stack trace of a
goroutine whose stack it does not own. For example, if the goroutine is
in _Grunnable or _Gwaiting. This is easily fixed in all cases by simply
moving the emission of GoStop and GoBlock events to before the
casgstatus happens. The goroutine status is what is used to signal stack
ownership, and the GC may shrink a goroutine's stack if it can acquire
the scan bit.
Although this is easily fixed, the interaction here is very subtle,
because stack ownership is only implicit in the goroutine's scan status.
To make this invariant more maintainable and less error-prone in the
future, this change adds a GODEBUG setting that checks, at the point of
taking a stack trace, whether the caller owns the goroutine. This check
is not quite perfect because there's no way for the stack tracing code
to know that the _Gscan bit was acquired by the caller, so for
simplicity it assumes that it was the caller that acquired the scan bit.
In all other cases however, we can check for ownership precisely. At the
very least, this check is sufficient to catch the issue this change is
fixing.
To make sure this debug check doesn't bitrot, it's always enabled during
trace testing. This new mode has actually caught a few other issues
already, so this change fixes them.
One issue that this debug mode caught was that it's not safe to take a
stack trace of a _Gwaiting goroutine that's being unparked.
Another much bigger issue this debug mode caught was the fact that the
execution tracer could try to take a stack trace of a G that was in
_Gwaiting solely to avoid a deadlock in the GC. The execution tracer
already has a partial list of these cases since they're modeled as the
goroutine just executing as normal in the tracer, but this change takes
the list and makes it more formal. In this specific case, we now prevent
the GC from shrinking the stacks of goroutines in this state if tracing
is enabled. The stack traces from these scenarios are too useful to
discard, but there is indeed a race here between the tracer and any
attempt to shrink the stack by the GC.
Change-Id: I019850dabc8cede202fd6dcc0a4b1f16764209fb
Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest,gotip-linux-amd64-longtest-race
Reviewed-on: https://go-review.googlesource.com/c/go/+/573155
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Auto-Submit: Michael Knyszek <mknyszek@google.com>
2024-03-21 18:49:05 +00:00
return false
}
return true
2019-09-27 14:34:05 -04:00
}
2014-11-11 17:04:34 -05:00
// Maybe shrink the stack being used by gp.
2019-09-27 14:34:05 -04:00
//
// gp must be stopped and we must own its stack. It may be in
// _Grunning, but only if this is our own user G.
2014-11-11 17:04:34 -05:00
func shrinkstack ( gp * g ) {
if gp . stack . lo == 0 {
2014-12-27 20:58:00 -08:00
throw ( "missing stack in shrinkstack" )
2014-11-11 17:04:34 -05:00
}
2019-09-27 14:34:05 -04:00
if s := readgstatus ( gp ) ; s & _Gscan == 0 {
// We don't own the stack via _Gscan. We could still
// own it if this is our own user G and we're on the
// system stack.
if ! ( gp == getg ( ) . m . curg && getg ( ) != getg ( ) . m . curg && s == _Grunning ) {
// We don't own the stack.
throw ( "bad status in shrinkstack" )
}
}
if ! isShrinkStackSafe ( gp ) {
throw ( "shrinkstack at bad time" )
2016-02-15 18:30:48 -05:00
}
runtime: make m.libcallsp check in shrinkstack panic
Currently, shrinkstack will not shrink a stack on Windows if
gp.m.libcallsp != 0. In general, we can't shrink stacks in syscalls
because the syscall may hold pointers into the stack, and in principle
this is supposed to be preventing that for libcall-based syscalls
(which are direct syscalls from the runtime). But this test is
actually broken and has been for a long time. That turns out to be
okay because it also appears it's not necessary.
This test is racy. g.m points to whatever M the G was last running on,
even if the G is in a blocked state, and that M could be doing
anything, including making libcalls. Hence, observing that libcallsp
== 0 at one moment in shrinkstack is no guarantee that it won't become
non-zero while we're shrinking the stack, and vice-versa.
It's also weird that this check is only performed on Windows, given
that we now use libcalls on macOS, Solaris, and AIX.
This check was added when stack shrinking was first implemented in CL
69580044. The history of that CL (though not the final version)
suggests this was necessary for libcalls that happened on Go user
stacks, which we never do now because of the limited stack space.
It could also be defending against user stack pointers passed to
libcall system calls from blocked Gs. But the runtime isn't allowed to
keep pointers into the user stack for blocked Gs on any OS, so it's
not clear this would be of any value.
Hence, this checks seems to be simply unnecessary.
Rather than simply remove it, this CL makes it defensive. We can't do
anything about blocked Gs, since it doesn't even make sense to look at
their M, but if a G tries to shrink its own stack while in a libcall,
that indicates a bug in the libcall code. This CL makes shrinkstack
panic in this case.
For #10958, #24543, since those are going to rearrange how we decide
that it's safe to shrink a stack.
Change-Id: Ia865e1f6340cff26637f8d513970f9ebb4735c6d
Reviewed-on: https://go-review.googlesource.com/c/go/+/173724
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2019-04-24 11:53:34 -04:00
// Check for self-shrinks while in a libcall. These may have
// pointers into the stack disguised as uintptrs, but these
// code paths should all be nosplit.
if gp == getg ( ) . m . curg && gp . m . libcallsp != 0 {
throw ( "shrinking stack in libcall" )
}
2014-11-11 17:04:34 -05:00
2015-06-05 11:51:49 -04:00
if debug . gcshrinkstackoff > 0 {
return
}
2017-02-09 14:11:13 -05:00
oldsize := gp . stack . hi - gp . stack . lo
2014-11-11 17:04:34 -05:00
newsize := oldsize / 2
runtime: account for stack guard when shrinking the stack
Currently, when shrinkstack computes whether the halved stack
allocation will have enough room for the stack, it accounts for the
stack space that's actively in use but fails to leave extra room for
the stack guard space. As a result, *if* the minimum stack size is
small enough or the guard large enough, it may shrink the stack and
leave less than enough room to run nosplit functions. If the next
function called after the stack shrink is a nosplit function, it may
overflow the stack without noticing and overwrite non-stack memory.
We don't think this is happening under normal conditions right now.
The minimum stack allocation is 2K and the guard is 640 bytes. The
"worst case" stack shrink is from 4K (4048 bytes after stack barrier
array reservation) to 2K (2016 bytes after stack barrier array
reservation), which means the largest "used" size that will qualify
for shrinking is 4048/4 - 8 = 1004 bytes. After copying, that leaves
2016 - 1004 = 1012 bytes of available stack, which is significantly
more than the guard space.
If we were to reduce the minimum stack size to 1K or raise the guard
space above 1012 bytes, the logic in shrinkstack would no longer leave
enough space.
It's also possible to trigger this problem by setting
firstStackBarrierOffset to 0, which puts stack barriers in a debug
mode that steals away *half* of the stack for the stack barrier array
reservation. Then, the largest "used" size that qualifies for
shrinking is (4096/2)/4 - 8 = 504 bytes. After copying, that leaves
(2096/2) - 504 = 8 bytes of available stack; much less than the
required guard space. This causes failures like those in issue #11027
because func gc() shrinks its own stack and then immediately calls
casgstatus (a nosplit function), which overflows the stack and
overwrites a free list pointer in the neighboring span. However, since
this seems to require the special debug mode, we don't think it's
responsible for issue #11027.
To forestall all of these subtle issues, this commit modifies
shrinkstack to correctly account for the guard space when considering
whether to halve the stack allocation.
Change-Id: I7312584addc63b5bfe55cc384a1012f6181f1b9d
Reviewed-on: https://go-review.googlesource.com/10714
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-04 17:28:02 -04:00
// Don't shrink the allocation below the minimum-sized stack
// allocation.
2023-04-19 14:58:47 -04:00
if newsize < fixedStack {
runtime: account for stack guard when shrinking the stack
Currently, when shrinkstack computes whether the halved stack
allocation will have enough room for the stack, it accounts for the
stack space that's actively in use but fails to leave extra room for
the stack guard space. As a result, *if* the minimum stack size is
small enough or the guard large enough, it may shrink the stack and
leave less than enough room to run nosplit functions. If the next
function called after the stack shrink is a nosplit function, it may
overflow the stack without noticing and overwrite non-stack memory.
We don't think this is happening under normal conditions right now.
The minimum stack allocation is 2K and the guard is 640 bytes. The
"worst case" stack shrink is from 4K (4048 bytes after stack barrier
array reservation) to 2K (2016 bytes after stack barrier array
reservation), which means the largest "used" size that will qualify
for shrinking is 4048/4 - 8 = 1004 bytes. After copying, that leaves
2016 - 1004 = 1012 bytes of available stack, which is significantly
more than the guard space.
If we were to reduce the minimum stack size to 1K or raise the guard
space above 1012 bytes, the logic in shrinkstack would no longer leave
enough space.
It's also possible to trigger this problem by setting
firstStackBarrierOffset to 0, which puts stack barriers in a debug
mode that steals away *half* of the stack for the stack barrier array
reservation. Then, the largest "used" size that qualifies for
shrinking is (4096/2)/4 - 8 = 504 bytes. After copying, that leaves
(2096/2) - 504 = 8 bytes of available stack; much less than the
required guard space. This causes failures like those in issue #11027
because func gc() shrinks its own stack and then immediately calls
casgstatus (a nosplit function), which overflows the stack and
overwrites a free list pointer in the neighboring span. However, since
this seems to require the special debug mode, we don't think it's
responsible for issue #11027.
To forestall all of these subtle issues, this commit modifies
shrinkstack to correctly account for the guard space when considering
whether to halve the stack allocation.
Change-Id: I7312584addc63b5bfe55cc384a1012f6181f1b9d
Reviewed-on: https://go-review.googlesource.com/10714
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-04 17:28:02 -04:00
return
2014-11-11 17:04:34 -05:00
}
runtime: account for stack guard when shrinking the stack
Currently, when shrinkstack computes whether the halved stack
allocation will have enough room for the stack, it accounts for the
stack space that's actively in use but fails to leave extra room for
the stack guard space. As a result, *if* the minimum stack size is
small enough or the guard large enough, it may shrink the stack and
leave less than enough room to run nosplit functions. If the next
function called after the stack shrink is a nosplit function, it may
overflow the stack without noticing and overwrite non-stack memory.
We don't think this is happening under normal conditions right now.
The minimum stack allocation is 2K and the guard is 640 bytes. The
"worst case" stack shrink is from 4K (4048 bytes after stack barrier
array reservation) to 2K (2016 bytes after stack barrier array
reservation), which means the largest "used" size that will qualify
for shrinking is 4048/4 - 8 = 1004 bytes. After copying, that leaves
2016 - 1004 = 1012 bytes of available stack, which is significantly
more than the guard space.
If we were to reduce the minimum stack size to 1K or raise the guard
space above 1012 bytes, the logic in shrinkstack would no longer leave
enough space.
It's also possible to trigger this problem by setting
firstStackBarrierOffset to 0, which puts stack barriers in a debug
mode that steals away *half* of the stack for the stack barrier array
reservation. Then, the largest "used" size that qualifies for
shrinking is (4096/2)/4 - 8 = 504 bytes. After copying, that leaves
(2096/2) - 504 = 8 bytes of available stack; much less than the
required guard space. This causes failures like those in issue #11027
because func gc() shrinks its own stack and then immediately calls
casgstatus (a nosplit function), which overflows the stack and
overwrites a free list pointer in the neighboring span. However, since
this seems to require the special debug mode, we don't think it's
responsible for issue #11027.
To forestall all of these subtle issues, this commit modifies
shrinkstack to correctly account for the guard space when considering
whether to halve the stack allocation.
Change-Id: I7312584addc63b5bfe55cc384a1012f6181f1b9d
Reviewed-on: https://go-review.googlesource.com/10714
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-04 17:28:02 -04:00
// Compute how much of the stack is currently in use and only
// shrink the stack if gp is using less than a quarter of its
// current stack. The currently used stack includes everything
// down to the SP plus the stack guard space that ensures
// there's room for nosplit functions.
avail := gp . stack . hi - gp . stack . lo
2023-04-19 14:01:05 -04:00
if used := gp . stack . hi - gp . sched . sp + stackNosplit ; used >= avail / 4 {
runtime: account for stack guard when shrinking the stack
Currently, when shrinkstack computes whether the halved stack
allocation will have enough room for the stack, it accounts for the
stack space that's actively in use but fails to leave extra room for
the stack guard space. As a result, *if* the minimum stack size is
small enough or the guard large enough, it may shrink the stack and
leave less than enough room to run nosplit functions. If the next
function called after the stack shrink is a nosplit function, it may
overflow the stack without noticing and overwrite non-stack memory.
We don't think this is happening under normal conditions right now.
The minimum stack allocation is 2K and the guard is 640 bytes. The
"worst case" stack shrink is from 4K (4048 bytes after stack barrier
array reservation) to 2K (2016 bytes after stack barrier array
reservation), which means the largest "used" size that will qualify
for shrinking is 4048/4 - 8 = 1004 bytes. After copying, that leaves
2016 - 1004 = 1012 bytes of available stack, which is significantly
more than the guard space.
If we were to reduce the minimum stack size to 1K or raise the guard
space above 1012 bytes, the logic in shrinkstack would no longer leave
enough space.
It's also possible to trigger this problem by setting
firstStackBarrierOffset to 0, which puts stack barriers in a debug
mode that steals away *half* of the stack for the stack barrier array
reservation. Then, the largest "used" size that qualifies for
shrinking is (4096/2)/4 - 8 = 504 bytes. After copying, that leaves
(2096/2) - 504 = 8 bytes of available stack; much less than the
required guard space. This causes failures like those in issue #11027
because func gc() shrinks its own stack and then immediately calls
casgstatus (a nosplit function), which overflows the stack and
overwrites a free list pointer in the neighboring span. However, since
this seems to require the special debug mode, we don't think it's
responsible for issue #11027.
To forestall all of these subtle issues, this commit modifies
shrinkstack to correctly account for the guard space when considering
whether to halve the stack allocation.
Change-Id: I7312584addc63b5bfe55cc384a1012f6181f1b9d
Reviewed-on: https://go-review.googlesource.com/10714
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-04 17:28:02 -04:00
return
2014-11-11 17:04:34 -05:00
}
if stackDebug > 0 {
print ( "shrinking stack " , oldsize , "->" , newsize , "\n" )
}
2014-11-15 08:00:38 -05:00
runtime: make copystack/sudog synchronization more explicit
When we copy a stack of a goroutine blocked in a channel operation, we
have to be very careful because other goroutines may be writing to
that goroutine's stack. To handle this, stack copying acquires the
locks for the channels a goroutine is waiting on.
One complication is that stack growth may happen while a goroutine
holds these locks, in which case stack copying must *not* acquire
these locks because that would self-deadlock.
Currently, stack growth never acquires these locks because stack
growth only happens when a goroutine is running, which means it's
either not blocking on a channel or it's holding the channel locks
already. Stack shrinking always acquires these locks because shrinking
happens asynchronously, so the goroutine is never running, so there
are either no locks or they've been released by the goroutine.
However, we're about to change when stack shrinking can happen, which
is going to break the current rules. Rather than find a new way to
derive whether to acquire these locks or not, this CL simply adds a
flag to the g struct that indicates that stack copying should acquire
channel locks. This flag is set while the goroutine is blocked on a
channel op.
For #10958, #24543.
Change-Id: Ia2ac8831b1bfda98d39bb30285e144c4f7eaf9ab
Reviewed-on: https://go-review.googlesource.com/c/go/+/172982
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
2019-04-03 14:00:12 -04:00
copystack ( gp , newsize )
2014-11-11 17:04:34 -05:00
}
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
// freeStackSpans frees unused stack spans at the end of GC.
func freeStackSpans ( ) {
// Scan stack pools for empty stack spans.
for order := range stackpool {
2019-08-01 16:22:28 +00:00
lock ( & stackpool [ order ] . item . mu )
list := & stackpool [ order ] . item . span
2015-10-15 15:59:49 -07:00
for s := list . first ; s != nil ; {
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
next := s . next
2016-02-16 17:16:43 -05:00
if s . allocCount == 0 {
2015-11-11 16:13:51 -08:00
list . remove ( s )
2017-03-16 15:02:02 -04:00
s . manualFreeList = 0
2018-06-29 14:56:48 -04:00
osStackFree ( s )
2020-07-29 19:00:37 +00:00
mheap_ . freeManual ( s , spanAllocStack )
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
}
s = next
}
2019-08-01 16:22:28 +00:00
unlock ( & stackpool [ order ] . item . mu )
2014-11-11 17:04:34 -05:00
}
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
2015-12-14 14:30:25 -05:00
// Free large stack spans.
lock ( & stackLarge . lock )
for i := range stackLarge . free {
for s := stackLarge . free [ i ] . first ; s != nil ; {
next := s . next
stackLarge . free [ i ] . remove ( s )
2018-06-29 14:56:48 -04:00
osStackFree ( s )
2020-07-29 19:00:37 +00:00
mheap_ . freeManual ( s , spanAllocStack )
2015-12-14 14:30:25 -05:00
s = next
}
}
unlock ( & stackLarge . lock )
2014-11-11 17:04:34 -05:00
}
2015-01-05 16:29:21 +00:00
2018-09-01 20:16:39 -07:00
// A stackObjectRecord is generated by the compiler for each stack object in a stack frame.
2021-01-28 15:23:05 +00:00
// This record must match the generator code in cmd/compile/internal/liveness/plive.go:emitStackObjects.
2018-09-01 20:16:39 -07:00
type stackObjectRecord struct {
// offset in frame
// if negative, offset from varp
// if non-negative, offset from argp
2021-09-27 14:27:20 -07:00
off int32
size int32
2023-11-15 13:38:06 -08:00
ptrBytes int32
2021-09-27 14:27:20 -07:00
gcdataoff uint32 // offset to gcdata from moduledata.rodata
2021-04-24 12:41:17 -04:00
}
2023-11-15 13:38:06 -08:00
// gcdata returns the number of bytes that contain pointers, and
// a ptr/nonptr bitmask covering those bytes.
// Note that this bitmask might be larger than internal/abi.MaxPtrmaskBytes.
func ( r * stackObjectRecord ) gcdata ( ) ( uintptr , * byte ) {
2021-09-27 14:27:20 -07:00
ptr := uintptr ( unsafe . Pointer ( r ) )
var mod * moduledata
for datap := & firstmoduledata ; datap != nil ; datap = datap . next {
if datap . gofunc <= ptr && ptr < datap . end {
mod = datap
break
}
}
// If you get a panic here due to a nil mod,
// you may have made a copy of a stackObjectRecord.
// You must use the original pointer.
res := mod . rodata + uintptr ( r . gcdataoff )
2023-11-15 13:38:06 -08:00
return uintptr ( r . ptrBytes ) , ( * byte ) ( unsafe . Pointer ( res ) )
2021-09-27 14:27:20 -07:00
}
2019-05-31 16:38:56 -04:00
// This is exported as ABI0 via linkname so obj can call it.
//
2015-01-05 16:29:21 +00:00
//go:nosplit
2019-05-31 16:38:56 -04:00
//go:linkname morestackc
2015-01-05 16:29:21 +00:00
func morestackc ( ) {
2018-01-12 12:39:22 -05:00
throw ( "attempt to execute system stack code on user stack" )
2015-01-05 16:29:21 +00:00
}
2021-08-28 15:50:52 -07:00
// startingStackSize is the amount of stack that new goroutines start with.
2024-11-12 23:23:12 +01:00
// It is a power of 2, and between fixedStack and maxstacksize, inclusive.
2021-08-28 15:50:52 -07:00
// startingStackSize is updated every GC by tracking the average size of
// stacks scanned during the GC.
2023-04-19 14:58:47 -04:00
var startingStackSize uint32 = fixedStack
2021-08-28 15:50:52 -07:00
func gcComputeStartingStackSize ( ) {
if debug . adaptivestackstart == 0 {
return
}
// For details, see the design doc at
// https://docs.google.com/document/d/1YDlGIdVTPnmUiTAavlZxBI1d9pwGQgZT7IKFKlIXohQ/edit?usp=sharing
// The basic algorithm is to track the average size of stacks
// and start goroutines with stack equal to that average size.
// Starting at the average size uses at most 2x the space that
// an ideal algorithm would have used.
// This is just a heuristic to avoid excessive stack growth work
// early in a goroutine's lifetime. See issue 18138. Stacks that
// are allocated too small can still grow, and stacks allocated
// too large can still shrink.
var scannedStackSize uint64
var scannedStacks uint64
for _ , p := range allp {
scannedStackSize += p . scannedStackSize
scannedStacks += p . scannedStacks
// Reset for next time
p . scannedStackSize = 0
p . scannedStacks = 0
}
if scannedStacks == 0 {
2023-04-19 14:58:47 -04:00
startingStackSize = fixedStack
2021-08-28 15:50:52 -07:00
return
}
2023-04-19 14:58:47 -04:00
avg := scannedStackSize / scannedStacks + stackGuard
// Note: we add stackGuard to ensure that a goroutine that
2021-08-28 15:50:52 -07:00
// uses the average space will not trigger a growth.
if avg > uint64 ( maxstacksize ) {
avg = uint64 ( maxstacksize )
}
2023-04-19 14:58:47 -04:00
if avg < fixedStack {
avg = fixedStack
2021-08-28 15:50:52 -07:00
}
// Note: maxstacksize fits in 30 bits, so avg also does.
startingStackSize = uint32 ( round2 ( int32 ( avg ) ) )
}