go/src/runtime/stack.go

1425 lines
44 KiB
Go
Raw Normal View History

// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runtime
import (
"internal/abi"
"internal/cpu"
"internal/goarch"
"internal/goexperiment"
"internal/goos"
"internal/runtime/atomic"
"internal/runtime/gc"
"internal/runtime/sys"
"math/bits"
"unsafe"
)
/*
Stack layout parameters.
Included both by runtime (compiled via 6c) and linkers (compiled via gcc).
The per-goroutine g->stackguard is set to point StackGuard bytes
above the bottom of the stack. Each function compares its stack
pointer against g->stackguard to check for overflow. To cut one
instruction from the check sequence for functions with tiny frames,
the stack is allowed to protrude StackSmall bytes below the stack
guard. Functions with large frames don't bother with the check and
always call morestack. The sequences are (for amd64, others are
similar):
guard = g->stackguard
frame = function's stack frame size
argsize = size of function arguments (call + return)
stack frame size <= StackSmall:
CMPQ guard, SP
JHI 3(PC)
MOVQ m->morearg, $(argsize << 32)
CALL morestack(SB)
stack frame size > StackSmall but < StackBig
LEAQ (frame-StackSmall)(SP), R0
CMPQ guard, R0
JHI 3(PC)
MOVQ m->morearg, $(argsize << 32)
CALL morestack(SB)
stack frame size >= StackBig:
MOVQ m->morearg, $((argsize << 32) | frame)
CALL morestack(SB)
The bottom StackGuard - StackSmall bytes are important: there has
to be enough room to execute functions that refuse to check for
stack overflow, either because they need to be adjacent to the
actual caller's frame (deferproc) or because they handle the imminent
stack overflow (morestack).
For example, deferproc might call malloc, which does one of the
above checks (without allocating a full frame), which might trigger
a call to morestack. This sequence needs to fit in the bottom
section of the stack. On amd64, morestack's frame is 40 bytes, and
deferproc's frame is 56 bytes. That fits well within the
StackGuard - StackSmall bytes at the bottom.
The linkers explore all possible call traces involving non-splitting
functions to make sure that this limit cannot be violated.
*/
const (
// stackSystem is a number of additional bytes to add
// to each stack below the usual guard area for OS-specific
// purposes like signal handling. Used on Windows, Plan 9,
// and iOS because they do not use a separate stack.
stackSystem = goos.IsWindows*4096 + goos.IsPlan9*512 + goos.IsIos*goarch.IsArm64*1024
// The minimum size of stack used by Go code
stackMin = 2048
// The minimum stack size to allocate.
// The hackery here rounds fixedStack0 up to a power of 2.
fixedStack0 = stackMin + stackSystem
fixedStack1 = fixedStack0 - 1
fixedStack2 = fixedStack1 | (fixedStack1 >> 1)
fixedStack3 = fixedStack2 | (fixedStack2 >> 2)
fixedStack4 = fixedStack3 | (fixedStack3 >> 4)
fixedStack5 = fixedStack4 | (fixedStack4 >> 8)
fixedStack6 = fixedStack5 | (fixedStack5 >> 16)
fixedStack = fixedStack6 + 1
// stackNosplit is the maximum number of bytes that a chain of NOSPLIT
// functions can use.
// This arithmetic must match that in cmd/internal/objabi/stack.go:StackNosplit.
stackNosplit = abi.StackNosplitBase * sys.StackGuardMultiplier
// The stack guard is a pointer this many bytes above the
// bottom of the stack.
//
// The guard leaves enough room for a stackNosplit chain of NOSPLIT calls
// plus one stackSmall frame plus stackSystem bytes for the OS.
// This arithmetic must match that in cmd/internal/objabi/stack.go:StackLimit.
stackGuard = stackNosplit + stackSystem + abi.StackSmall
)
const (
// stackDebug == 0: no logging
// == 1: logging of per-stack operations
// == 2: logging of per-frame operations
// == 3: logging of per-word updates
// == 4: logging of per-word reads
stackDebug = 0
stackFromSystem = 0 // allocate stacks from system memory instead of the heap
stackFaultOnFree = 0 // old stacks are mapped noaccess to detect use after free
stackNoCache = 0 // disable per-P small stack caches
// check the BP links during traceback.
debugCheckBP = false
)
var (
stackPoisonCopy = 0 // fill stack that should not be accessed with garbage, to detect bad dereferences during copy
)
const (
uintptrMask = 1<<(8*goarch.PtrSize) - 1
// The values below can be stored to g.stackguard0 to force
// the next stack check to fail.
// These are all larger than any real SP.
// Goroutine preemption request.
// 0xfffffade in hex.
stackPreempt = uintptrMask & -1314
// Thread is forking. Causes a split stack check failure.
// 0xfffffb2e in hex.
stackFork = uintptrMask & -1234
// Force a stack movement. Used for debugging.
// 0xfffffeed in hex.
stackForceMove = uintptrMask & -275
// stackPoisonMin is the lowest allowed stack poison value.
stackPoisonMin = uintptrMask & -4096
)
// Global pool of spans that have free stacks.
// Stacks are assigned an order according to size.
//
// order = log_2(size/FixedStack)
//
// There is a free list for each order.
var stackpool [_NumStackOrders]struct {
item stackpoolItem
_ [(cpu.CacheLinePadSize - unsafe.Sizeof(stackpoolItem{})%cpu.CacheLinePadSize) % cpu.CacheLinePadSize]byte
}
type stackpoolItem struct {
_ sys.NotInHeap
mu mutex
span mSpanList
}
// Global pool of large stack spans.
var stackLarge struct {
lock mutex
free [heapAddrBits - gc.PageShift]mSpanList // free lists by log_2(s.npages)
}
func stackinit() {
if _StackCacheSize&pageMask != 0 {
throw("cache size must be a multiple of page size")
}
for i := range stackpool {
stackpool[i].item.span.init()
runtime: static lock ranking for the runtime (enabled by GOEXPERIMENT) I took some of the infrastructure from Austin's lock logging CR https://go-review.googlesource.com/c/go/+/192704 (with deadlock detection from the logs), and developed a setup to give static lock ranking for runtime locks. Static lock ranking establishes a documented total ordering among locks, and then reports an error if the total order is violated. This can happen if a deadlock happens (by acquiring a sequence of locks in different orders), or if just one side of a possible deadlock happens. Lock ordering deadlocks cannot happen as long as the lock ordering is followed. Along the way, I found a deadlock involving the new timer code, which Ian fixed via https://go-review.googlesource.com/c/go/+/207348, as well as two other potential deadlocks. See the constants at the top of runtime/lockrank.go to show the static lock ranking that I ended up with, along with some comments. This is great documentation of the current intended lock ordering when acquiring multiple locks in the runtime. I also added an array lockPartialOrder[] which shows and enforces the current partial ordering among locks (which is embedded within the total ordering). This is more specific about the dependencies among locks. I don't try to check the ranking within a lock class with multiple locks that can be acquired at the same time (i.e. check the ranking when multiple hchan locks are acquired). Currently, I am doing a lockInit() call to set the lock rank of most locks. Any lock that is not otherwise initialized is assumed to be a leaf lock (a very high rank lock), so that eliminates the need to do anything for a bunch of locks (including all architecture-dependent locks). For two locks, root.lock and notifyList.lock (only in the runtime/sema.go file), it is not as easy to do lock initialization, so instead, I am passing the lock rank with the lock calls. For Windows compilation, I needed to increase the StackGuard size from 896 to 928 because of the new lock-rank checking functions. Checking of the static lock ranking is enabled by setting GOEXPERIMENT=staticlockranking before doing a run. To make sure that the static lock ranking code has no overhead in memory or CPU when not enabled by GOEXPERIMENT, I changed 'go build/install' so that it defines a build tag (with the same name) whenever any experiment has been baked into the toolchain (by checking Expstring()). This allows me to avoid increasing the size of the 'mutex' type when static lock ranking is not enabled. Fixes #38029 Change-Id: I154217ff307c47051f8dae9c2a03b53081acd83a Reviewed-on: https://go-review.googlesource.com/c/go/+/207619 Reviewed-by: Dan Scales <danscales@google.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Dan Scales <danscales@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2019-11-13 17:34:47 -08:00
lockInit(&stackpool[i].item.mu, lockRankStackpool)
}
for i := range stackLarge.free {
stackLarge.free[i].init()
runtime: static lock ranking for the runtime (enabled by GOEXPERIMENT) I took some of the infrastructure from Austin's lock logging CR https://go-review.googlesource.com/c/go/+/192704 (with deadlock detection from the logs), and developed a setup to give static lock ranking for runtime locks. Static lock ranking establishes a documented total ordering among locks, and then reports an error if the total order is violated. This can happen if a deadlock happens (by acquiring a sequence of locks in different orders), or if just one side of a possible deadlock happens. Lock ordering deadlocks cannot happen as long as the lock ordering is followed. Along the way, I found a deadlock involving the new timer code, which Ian fixed via https://go-review.googlesource.com/c/go/+/207348, as well as two other potential deadlocks. See the constants at the top of runtime/lockrank.go to show the static lock ranking that I ended up with, along with some comments. This is great documentation of the current intended lock ordering when acquiring multiple locks in the runtime. I also added an array lockPartialOrder[] which shows and enforces the current partial ordering among locks (which is embedded within the total ordering). This is more specific about the dependencies among locks. I don't try to check the ranking within a lock class with multiple locks that can be acquired at the same time (i.e. check the ranking when multiple hchan locks are acquired). Currently, I am doing a lockInit() call to set the lock rank of most locks. Any lock that is not otherwise initialized is assumed to be a leaf lock (a very high rank lock), so that eliminates the need to do anything for a bunch of locks (including all architecture-dependent locks). For two locks, root.lock and notifyList.lock (only in the runtime/sema.go file), it is not as easy to do lock initialization, so instead, I am passing the lock rank with the lock calls. For Windows compilation, I needed to increase the StackGuard size from 896 to 928 because of the new lock-rank checking functions. Checking of the static lock ranking is enabled by setting GOEXPERIMENT=staticlockranking before doing a run. To make sure that the static lock ranking code has no overhead in memory or CPU when not enabled by GOEXPERIMENT, I changed 'go build/install' so that it defines a build tag (with the same name) whenever any experiment has been baked into the toolchain (by checking Expstring()). This allows me to avoid increasing the size of the 'mutex' type when static lock ranking is not enabled. Fixes #38029 Change-Id: I154217ff307c47051f8dae9c2a03b53081acd83a Reviewed-on: https://go-review.googlesource.com/c/go/+/207619 Reviewed-by: Dan Scales <danscales@google.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Dan Scales <danscales@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2019-11-13 17:34:47 -08:00
lockInit(&stackLarge.lock, lockRankStackLarge)
}
}
// stacklog2 returns ⌊log_2(n)⌋.
func stacklog2(n uintptr) int {
if n == 0 {
return 0
}
return bits.Len64(uint64(n))
}
// Allocates a stack from the free pool. Must be called with
// stackpool[order].item.mu held.
func stackpoolalloc(order uint8) gclinkptr {
list := &stackpool[order].item.span
s := list.first
runtime: static lock ranking for the runtime (enabled by GOEXPERIMENT) I took some of the infrastructure from Austin's lock logging CR https://go-review.googlesource.com/c/go/+/192704 (with deadlock detection from the logs), and developed a setup to give static lock ranking for runtime locks. Static lock ranking establishes a documented total ordering among locks, and then reports an error if the total order is violated. This can happen if a deadlock happens (by acquiring a sequence of locks in different orders), or if just one side of a possible deadlock happens. Lock ordering deadlocks cannot happen as long as the lock ordering is followed. Along the way, I found a deadlock involving the new timer code, which Ian fixed via https://go-review.googlesource.com/c/go/+/207348, as well as two other potential deadlocks. See the constants at the top of runtime/lockrank.go to show the static lock ranking that I ended up with, along with some comments. This is great documentation of the current intended lock ordering when acquiring multiple locks in the runtime. I also added an array lockPartialOrder[] which shows and enforces the current partial ordering among locks (which is embedded within the total ordering). This is more specific about the dependencies among locks. I don't try to check the ranking within a lock class with multiple locks that can be acquired at the same time (i.e. check the ranking when multiple hchan locks are acquired). Currently, I am doing a lockInit() call to set the lock rank of most locks. Any lock that is not otherwise initialized is assumed to be a leaf lock (a very high rank lock), so that eliminates the need to do anything for a bunch of locks (including all architecture-dependent locks). For two locks, root.lock and notifyList.lock (only in the runtime/sema.go file), it is not as easy to do lock initialization, so instead, I am passing the lock rank with the lock calls. For Windows compilation, I needed to increase the StackGuard size from 896 to 928 because of the new lock-rank checking functions. Checking of the static lock ranking is enabled by setting GOEXPERIMENT=staticlockranking before doing a run. To make sure that the static lock ranking code has no overhead in memory or CPU when not enabled by GOEXPERIMENT, I changed 'go build/install' so that it defines a build tag (with the same name) whenever any experiment has been baked into the toolchain (by checking Expstring()). This allows me to avoid increasing the size of the 'mutex' type when static lock ranking is not enabled. Fixes #38029 Change-Id: I154217ff307c47051f8dae9c2a03b53081acd83a Reviewed-on: https://go-review.googlesource.com/c/go/+/207619 Reviewed-by: Dan Scales <danscales@google.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Dan Scales <danscales@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2019-11-13 17:34:47 -08:00
lockWithRankMayAcquire(&mheap_.lock, lockRankMheap)
if s == nil {
// no free stacks. Allocate another span worth.
s = mheap_.allocManual(_StackCacheSize>>gc.PageShift, spanAllocStack)
if s == nil {
throw("out of memory")
}
if s.allocCount != 0 {
throw("bad allocCount")
}
if s.manualFreeList.ptr() != nil {
throw("bad manualFreeList")
}
osStackAlloc(s)
s.elemsize = fixedStack << order
for i := uintptr(0); i < _StackCacheSize; i += s.elemsize {
x := gclinkptr(s.base() + i)
runtime: add valgrind instrumentation Add build tag gated Valgrind annotations to the runtime which let it understand how the runtime manages memory. This allows for Go binaries to be run under Valgrind without emitting spurious errors. Instead of adding the Valgrind headers to the tree, and using cgo to call the various Valgrind client request macros, we just add an assembly function which emits the necessary instructions to trigger client requests. In particular we add instrumentation of the memory allocator, using a two-level mempool structure (as described in the Valgrind manual [0]). We also add annotations which allow Valgrind to track which memory we use for stacks, which seems necessary to let it properly function. We describe the memory model to Valgrind as follows: we treat heap arenas as a "pool" created with VALGRIND_CREATE_MEMPOOL_EXT (so that we can use VALGRIND_MEMPOOL_METAPOOL and VALGRIND_MEMPOOL_AUTO_FREE). Within the pool we treat spans as "superblocks", annotated with VALGRIND_MEMPOOL_ALLOC. We then allocate individual objects within spans with VALGRIND_MALLOCLIKE_BLOCK. It should be noted that running binaries under Valgrind can be _quite slow_, and certain operations, such as running the GC, can be _very slow_. It is recommended to run programs with GOGC=off. Additionally, async preemption should be turned off, since it'll cause strange behavior (GODEBUG=asyncpreemptoff=1). Running Valgrind with --leak-check=yes will result in some errors resulting from some things not being marked fully free'd. These likely need more annotations to rectify, but for now it is recommended to run with --leak-check=off. Updates #73602 [0] https://valgrind.org/docs/manual/mc-manual.html#mc-manual.mempools Change-Id: I71b26c47d7084de71ef1e03947ef6b1cc6d38301 Reviewed-on: https://go-review.googlesource.com/c/go/+/674077 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Michael Knyszek <mknyszek@google.com>
2025-03-22 00:58:55 +00:00
if valgrindenabled {
// The address of x.ptr() becomes the base of stacks. We need to
// mark it allocated here and in stackfree and stackpoolfree, and free'd in
// stackalloc in order to avoid overlapping allocations and
// uninitialized memory errors in valgrind.
valgrindMalloc(unsafe.Pointer(x.ptr()), unsafe.Sizeof(x.ptr()))
}
x.ptr().next = s.manualFreeList
s.manualFreeList = x
}
list.insert(s)
}
x := s.manualFreeList
if x.ptr() == nil {
throw("span has no free stacks")
}
s.manualFreeList = x.ptr().next
s.allocCount++
if s.manualFreeList.ptr() == nil {
// all stacks in s are allocated.
list.remove(s)
}
return x
}
// Adds stack x to the free pool. Must be called with stackpool[order].item.mu held.
func stackpoolfree(x gclinkptr, order uint8) {
s := spanOfUnchecked(uintptr(x))
runtime: atomically set span state and use as publication barrier When everything is working correctly, any pointer the garbage collector encounters can only point into a fully initialized heap span, since the span must have been initialized before that pointer could escape the heap allocator and become visible to the GC. However, in various cases, we try to be defensive against bad pointers. In findObject, this is just a sanity check: we never expect to find a bad pointer, but programming errors can lead to them. In spanOfHeap, we don't necessarily trust the pointer and we're trying to check if it really does point to the heap, though it should always point to something. Conservative scanning takes this to a new level, since it can only guess that a word may be a pointer and verify this. In all of these cases, we have a problem that the span lookup and check can race with span initialization, since the span becomes visible to lookups before it's fully initialized. Furthermore, we're about to start initializing the span without the heap lock held, which is going to introduce races where accesses were previously protected by the heap lock. To address this, this CL makes accesses to mspan.state atomic, and ensures that the span is fully initialized before setting the state to mSpanInUse. All loads are now atomic, and in any case where we don't trust the pointer, it first atomically loads the span state and checks that it's mSpanInUse, after which it will have synchronized with span initialization and can safely check the other span fields. For #10958, #24543, but a good fix in general. Change-Id: I518b7c63555b02064b98aa5f802c92b758fef853 Reviewed-on: https://go-review.googlesource.com/c/go/+/203286 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Michael Knyszek <mknyszek@google.com>
2019-10-23 11:25:38 -04:00
if s.state.get() != mSpanManual {
throw("freeing stack not in a stack span")
}
if s.manualFreeList.ptr() == nil {
// s will now have a free stack
stackpool[order].item.span.insert(s)
}
x.ptr().next = s.manualFreeList
s.manualFreeList = x
s.allocCount--
if gcphase == _GCoff && s.allocCount == 0 {
runtime: don't free stack spans during GC Memory for stacks is manually managed by the runtime and, currently (with one exception) we free stack spans immediately when the last stack on a span is freed. However, the garbage collector assumes that spans can never transition from non-free to free during scan or mark. This disagreement makes it possible for the garbage collector to mark uninitialized objects and is blocking us from re-enabling the bad pointer test in the garbage collector (issue #9880). For example, the following sequence will result in marking an uninitialized object: 1. scanobject loads a pointer slot out of the object it's scanning. This happens to be one of the special pointers from the heap into a stack. Call the pointer p and suppose it points into X's stack. 2. X, running on another thread, grows its stack and frees its old stack. 3. The old stack happens to be large or was the last stack in its span, so X frees this span, setting it to state _MSpanFree. 4. The span gets reused as a heap span. 5. scanobject calls heapBitsForObject, which loads the span containing p, which is now in state _MSpanInUse, but doesn't necessarily have an object at p. The not-object at p gets marked, and at this point all sorts of things can go wrong. We already have a partial solution to this. When shrinking a stack, we put the old stack on a queue to be freed at the end of garbage collection. This was done to address exactly this problem, but wasn't a complete solution. This commit generalizes this solution to both shrinking and growing stacks. For stacks that fit in the stack pool, we simply don't free the span, even if its reference count reaches zero. It's fine to reuse the span for other stacks, and this enables that. At the end of GC, we sweep for cached stack spans with a zero reference count and free them. For larger stacks, we simply queue the stack span to be freed at the end of GC. Ideally, we would reuse these large stack spans the way we can small stack spans, but that's a more invasive change that will have to wait until after the freeze. Fixes #11267. Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71 Reviewed-on: https://go-review.googlesource.com/11502 Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
// Span is completely free. Return it to the heap
// immediately if we're sweeping.
//
// If GC is active, we delay the free until the end of
// GC to avoid the following type of situation:
//
// 1) GC starts, scans a SudoG but does not yet mark the SudoG.elem pointer
// 2) The stack that pointer points to is copied
// 3) The old stack is freed
// 4) The containing span is marked free
// 5) GC attempts to mark the SudoG.elem pointer. The
// marking fails because the pointer looks like a
// pointer into a free span.
//
// By not freeing, we prevent step #4 until GC is done.
stackpool[order].item.span.remove(s)
s.manualFreeList = 0
osStackFree(s)
mheap_.freeManual(s, spanAllocStack)
}
}
// stackcacherefill/stackcacherelease implement a global pool of stack segments.
// The pool is required to prevent unlimited growth of per-thread caches.
//
//go:systemstack
func stackcacherefill(c *mcache, order uint8) {
if stackDebug >= 1 {
print("stackcacherefill order=", order, "\n")
}
// Grab some stacks from the global cache.
// Grab half of the allowed capacity (to prevent thrashing).
var list gclinkptr
var size uintptr
lock(&stackpool[order].item.mu)
for size < _StackCacheSize/2 {
x := stackpoolalloc(order)
x.ptr().next = list
list = x
size += fixedStack << order
}
unlock(&stackpool[order].item.mu)
c.stackcache[order].list = list
c.stackcache[order].size = size
}
//go:systemstack
func stackcacherelease(c *mcache, order uint8) {
if stackDebug >= 1 {
print("stackcacherelease order=", order, "\n")
}
x := c.stackcache[order].list
size := c.stackcache[order].size
lock(&stackpool[order].item.mu)
for size > _StackCacheSize/2 {
y := x.ptr().next
stackpoolfree(x, order)
x = y
size -= fixedStack << order
}
unlock(&stackpool[order].item.mu)
c.stackcache[order].list = x
c.stackcache[order].size = size
}
//go:systemstack
func stackcache_clear(c *mcache) {
if stackDebug >= 1 {
print("stackcache clear\n")
}
for order := uint8(0); order < _NumStackOrders; order++ {
lock(&stackpool[order].item.mu)
x := c.stackcache[order].list
for x.ptr() != nil {
y := x.ptr().next
stackpoolfree(x, order)
x = y
}
c.stackcache[order].list = 0
c.stackcache[order].size = 0
unlock(&stackpool[order].item.mu)
}
}
// stackalloc allocates an n byte stack.
//
// stackalloc must run on the system stack because it uses per-P
// resources and must not split the stack.
//
//go:systemstack
func stackalloc(n uint32) stack {
// Stackalloc must be called on scheduler stack, so that we
// never try to grow the stack during the code that stackalloc runs.
// Doing so would cause a deadlock (issue 1547).
thisg := getg()
if thisg != thisg.m.g0 {
throw("stackalloc not on scheduler stack")
}
if n&(n-1) != 0 {
throw("stack size not a power of 2")
}
if stackDebug >= 1 {
print("stackalloc ", n, "\n")
}
if debug.efence != 0 || stackFromSystem != 0 {
n = uint32(alignUp(uintptr(n), physPageSize))
runtime: decorate anonymous memory mappings Leverage the prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ...) API to name the anonymous memory areas. This API has been introduced in Linux 5.17 to decorate the anonymous memory areas shown in /proc/<pid>/maps. This is already used by glibc. See: * https://sourceware.org/git/?p=glibc.git;a=blob;f=malloc/malloc.c;h=27dfd1eb907f4615b70c70237c42c552bb4f26a8;hb=HEAD#l2434 * https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/setvmaname.c;h=ea93a5ffbebc9e5a7e32a297138f465724b4725f;hb=HEAD#l63 This can be useful when investigating the memory consumption of a multi-language program. On a 100% Go program, pprof profiler can be used to profile the memory consumption of the program. But pprof is only aware of what happens within the Go world. On a multi-language program, there could be a doubt about whether the suspicious extra-memory consumption comes from the Go part or the native part. With this change, the following Go program: package main import ( "fmt" "log" "os" ) /* #include <stdlib.h> void f(void) { (void)malloc(1024*1024*1024); } */ import "C" func main() { C.f() data, err := os.ReadFile("/proc/self/maps") if err != nil { log.Fatal(err) } fmt.Println(string(data)) } produces this output: $ GLIBC_TUNABLES=glibc.mem.decorate_maps=1 ~/doc/devel/open-source/go/bin/go run . 00400000-00402000 r--p 00000000 00:21 28451768 /home/lenaic/.cache/go-build/9f/9f25a17baed5a80d03eb080a2ce2a5ff49c17f9a56e28330f0474a2bb74a30a0-d/test_vma_name 00402000-004a4000 r-xp 00002000 00:21 28451768 /home/lenaic/.cache/go-build/9f/9f25a17baed5a80d03eb080a2ce2a5ff49c17f9a56e28330f0474a2bb74a30a0-d/test_vma_name 004a4000-00574000 r--p 000a4000 00:21 28451768 /home/lenaic/.cache/go-build/9f/9f25a17baed5a80d03eb080a2ce2a5ff49c17f9a56e28330f0474a2bb74a30a0-d/test_vma_name 00574000-00575000 r--p 00173000 00:21 28451768 /home/lenaic/.cache/go-build/9f/9f25a17baed5a80d03eb080a2ce2a5ff49c17f9a56e28330f0474a2bb74a30a0-d/test_vma_name 00575000-00580000 rw-p 00174000 00:21 28451768 /home/lenaic/.cache/go-build/9f/9f25a17baed5a80d03eb080a2ce2a5ff49c17f9a56e28330f0474a2bb74a30a0-d/test_vma_name 00580000-005a4000 rw-p 00000000 00:00 0 2e075000-2e096000 rw-p 00000000 00:00 0 [heap] c000000000-c000400000 rw-p 00000000 00:00 0 [anon: Go: heap] c000400000-c004000000 ---p 00000000 00:00 0 [anon: Go: heap reservation] 777f40000000-777f40021000 rw-p 00000000 00:00 0 [anon: glibc: malloc arena] 777f40021000-777f44000000 ---p 00000000 00:00 0 777f44000000-777f44021000 rw-p 00000000 00:00 0 [anon: glibc: malloc arena] 777f44021000-777f48000000 ---p 00000000 00:00 0 777f48000000-777f48021000 rw-p 00000000 00:00 0 [anon: glibc: malloc arena] 777f48021000-777f4c000000 ---p 00000000 00:00 0 777f4c000000-777f4c021000 rw-p 00000000 00:00 0 [anon: glibc: malloc arena] 777f4c021000-777f50000000 ---p 00000000 00:00 0 777f50000000-777f50021000 rw-p 00000000 00:00 0 [anon: glibc: malloc arena] 777f50021000-777f54000000 ---p 00000000 00:00 0 777f55afb000-777f55afc000 ---p 00000000 00:00 0 777f55afc000-777f562fc000 rw-p 00000000 00:00 0 [anon: glibc: pthread stack: 216378] 777f562fc000-777f562fd000 ---p 00000000 00:00 0 777f562fd000-777f56afd000 rw-p 00000000 00:00 0 [anon: glibc: pthread stack: 216377] 777f56afd000-777f56afe000 ---p 00000000 00:00 0 777f56afe000-777f572fe000 rw-p 00000000 00:00 0 [anon: glibc: pthread stack: 216376] 777f572fe000-777f572ff000 ---p 00000000 00:00 0 777f572ff000-777f57aff000 rw-p 00000000 00:00 0 [anon: glibc: pthread stack: 216375] 777f57aff000-777f57b00000 ---p 00000000 00:00 0 777f57b00000-777f58300000 rw-p 00000000 00:00 0 [anon: glibc: pthread stack: 216374] 777f58300000-777f58400000 rw-p 00000000 00:00 0 [anon: Go: page alloc index] 777f58400000-777f5a400000 rw-p 00000000 00:00 0 [anon: Go: heap index] 777f5a400000-777f6a580000 ---p 00000000 00:00 0 [anon: Go: scavenge index] 777f6a580000-777f6a581000 rw-p 00000000 00:00 0 [anon: Go: scavenge index] 777f6a581000-777f7a400000 ---p 00000000 00:00 0 [anon: Go: scavenge index] 777f7a400000-777f8a580000 ---p 00000000 00:00 0 [anon: Go: page summary] 777f8a580000-777f8a581000 rw-p 00000000 00:00 0 [anon: Go: page alloc] 777f8a581000-777f9c430000 ---p 00000000 00:00 0 [anon: Go: page summary] 777f9c430000-777f9c431000 rw-p 00000000 00:00 0 [anon: Go: page alloc] 777f9c431000-777f9e806000 ---p 00000000 00:00 0 [anon: Go: page summary] 777f9e806000-777f9e807000 rw-p 00000000 00:00 0 [anon: Go: page alloc] 777f9e807000-777f9ec00000 ---p 00000000 00:00 0 [anon: Go: page summary] 777f9ec36000-777f9ecb6000 rw-p 00000000 00:00 0 [anon: Go: immortal metadata] 777f9ecb6000-777f9ecc6000 rw-p 00000000 00:00 0 [anon: Go: gc bits] 777f9ecc6000-777f9ecd6000 rw-p 00000000 00:00 0 [anon: Go: allspans array] 777f9ecd6000-777f9ece7000 rw-p 00000000 00:00 0 [anon: Go: immortal metadata] 777f9ece7000-777f9ed67000 ---p 00000000 00:00 0 [anon: Go: page summary] 777f9ed67000-777f9ed68000 rw-p 00000000 00:00 0 [anon: Go: page alloc] 777f9ed68000-777f9ede7000 ---p 00000000 00:00 0 [anon: Go: page summary] 777f9ede7000-777f9ee07000 rw-p 00000000 00:00 0 [anon: Go: page alloc] 777f9ee07000-777f9ee0a000 rw-p 00000000 00:00 0 [anon: glibc: loader malloc] 777f9ee0a000-777f9ee2e000 r--p 00000000 00:21 48158213 /usr/lib/libc.so.6 777f9ee2e000-777f9ef9f000 r-xp 00024000 00:21 48158213 /usr/lib/libc.so.6 777f9ef9f000-777f9efee000 r--p 00195000 00:21 48158213 /usr/lib/libc.so.6 777f9efee000-777f9eff2000 r--p 001e3000 00:21 48158213 /usr/lib/libc.so.6 777f9eff2000-777f9eff4000 rw-p 001e7000 00:21 48158213 /usr/lib/libc.so.6 777f9eff4000-777f9effc000 rw-p 00000000 00:00 0 777f9effc000-777f9effe000 rw-p 00000000 00:00 0 [anon: glibc: loader malloc] 777f9f00a000-777f9f04a000 rw-p 00000000 00:00 0 [anon: Go: immortal metadata] 777f9f04a000-777f9f04c000 r--p 00000000 00:00 0 [vvar] 777f9f04c000-777f9f04e000 r--p 00000000 00:00 0 [vvar_vclock] 777f9f04e000-777f9f050000 r-xp 00000000 00:00 0 [vdso] 777f9f050000-777f9f051000 r--p 00000000 00:21 48158204 /usr/lib/ld-linux-x86-64.so.2 777f9f051000-777f9f07a000 r-xp 00001000 00:21 48158204 /usr/lib/ld-linux-x86-64.so.2 777f9f07a000-777f9f085000 r--p 0002a000 00:21 48158204 /usr/lib/ld-linux-x86-64.so.2 777f9f085000-777f9f087000 r--p 00034000 00:21 48158204 /usr/lib/ld-linux-x86-64.so.2 777f9f087000-777f9f088000 rw-p 00036000 00:21 48158204 /usr/lib/ld-linux-x86-64.so.2 777f9f088000-777f9f089000 rw-p 00000000 00:00 0 7ffc7bfa7000-7ffc7bfc8000 rw-p 00000000 00:00 0 [stack] ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall] The anonymous memory areas are now labelled so that we can see which ones have been allocated by the Go runtime versus which ones have been allocated by the glibc. Fixes #71546 Change-Id: I304e8b4dd7f2477a6da794fd44e9a7a5354e4bf4 Reviewed-on: https://go-review.googlesource.com/c/go/+/646095 Auto-Submit: Alan Donovan <adonovan@google.com> Commit-Queue: Alan Donovan <adonovan@google.com> Reviewed-by: Felix Geisendörfer <felix.geisendoerfer@datadoghq.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Michael Knyszek <mknyszek@google.com> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
2025-02-01 14:19:04 +01:00
v := sysAlloc(uintptr(n), &memstats.stacks_sys, "goroutine stack (system)")
if v == nil {
throw("out of memory (stackalloc)")
}
return stack{uintptr(v), uintptr(v) + uintptr(n)}
}
// Small stacks are allocated with a fixed-size free-list allocator.
// If we need a stack of a bigger size, we fall back on allocating
// a dedicated span.
var v unsafe.Pointer
if n < fixedStack<<_NumStackOrders && n < _StackCacheSize {
order := uint8(0)
n2 := n
for n2 > fixedStack {
order++
n2 >>= 1
}
var x gclinkptr
if stackNoCache != 0 || thisg.m.p == 0 || thisg.m.preemptoff != "" {
// thisg.m.p == 0 can happen in the guts of exitsyscall
// or procresize. Just get a stack from the global pool.
// Also don't touch stackcache during gc
// as it's flushed concurrently.
lock(&stackpool[order].item.mu)
x = stackpoolalloc(order)
unlock(&stackpool[order].item.mu)
} else {
c := thisg.m.p.ptr().mcache
x = c.stackcache[order].list
if x.ptr() == nil {
stackcacherefill(c, order)
x = c.stackcache[order].list
}
c.stackcache[order].list = x.ptr().next
c.stackcache[order].size -= uintptr(n)
}
runtime: add valgrind instrumentation Add build tag gated Valgrind annotations to the runtime which let it understand how the runtime manages memory. This allows for Go binaries to be run under Valgrind without emitting spurious errors. Instead of adding the Valgrind headers to the tree, and using cgo to call the various Valgrind client request macros, we just add an assembly function which emits the necessary instructions to trigger client requests. In particular we add instrumentation of the memory allocator, using a two-level mempool structure (as described in the Valgrind manual [0]). We also add annotations which allow Valgrind to track which memory we use for stacks, which seems necessary to let it properly function. We describe the memory model to Valgrind as follows: we treat heap arenas as a "pool" created with VALGRIND_CREATE_MEMPOOL_EXT (so that we can use VALGRIND_MEMPOOL_METAPOOL and VALGRIND_MEMPOOL_AUTO_FREE). Within the pool we treat spans as "superblocks", annotated with VALGRIND_MEMPOOL_ALLOC. We then allocate individual objects within spans with VALGRIND_MALLOCLIKE_BLOCK. It should be noted that running binaries under Valgrind can be _quite slow_, and certain operations, such as running the GC, can be _very slow_. It is recommended to run programs with GOGC=off. Additionally, async preemption should be turned off, since it'll cause strange behavior (GODEBUG=asyncpreemptoff=1). Running Valgrind with --leak-check=yes will result in some errors resulting from some things not being marked fully free'd. These likely need more annotations to rectify, but for now it is recommended to run with --leak-check=off. Updates #73602 [0] https://valgrind.org/docs/manual/mc-manual.html#mc-manual.mempools Change-Id: I71b26c47d7084de71ef1e03947ef6b1cc6d38301 Reviewed-on: https://go-review.googlesource.com/c/go/+/674077 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Michael Knyszek <mknyszek@google.com>
2025-03-22 00:58:55 +00:00
if valgrindenabled {
// We're about to allocate the stack region starting at x.ptr().
// To prevent valgrind from complaining about overlapping allocations,
// we need to mark the (previously allocated) memory as free'd.
valgrindFree(unsafe.Pointer(x.ptr()))
}
v = unsafe.Pointer(x)
} else {
var s *mspan
npage := uintptr(n) >> gc.PageShift
log2npage := stacklog2(npage)
// Try to get a stack from the large stack cache.
lock(&stackLarge.lock)
if !stackLarge.free[log2npage].isEmpty() {
s = stackLarge.free[log2npage].first
stackLarge.free[log2npage].remove(s)
}
unlock(&stackLarge.lock)
runtime: static lock ranking for the runtime (enabled by GOEXPERIMENT) I took some of the infrastructure from Austin's lock logging CR https://go-review.googlesource.com/c/go/+/192704 (with deadlock detection from the logs), and developed a setup to give static lock ranking for runtime locks. Static lock ranking establishes a documented total ordering among locks, and then reports an error if the total order is violated. This can happen if a deadlock happens (by acquiring a sequence of locks in different orders), or if just one side of a possible deadlock happens. Lock ordering deadlocks cannot happen as long as the lock ordering is followed. Along the way, I found a deadlock involving the new timer code, which Ian fixed via https://go-review.googlesource.com/c/go/+/207348, as well as two other potential deadlocks. See the constants at the top of runtime/lockrank.go to show the static lock ranking that I ended up with, along with some comments. This is great documentation of the current intended lock ordering when acquiring multiple locks in the runtime. I also added an array lockPartialOrder[] which shows and enforces the current partial ordering among locks (which is embedded within the total ordering). This is more specific about the dependencies among locks. I don't try to check the ranking within a lock class with multiple locks that can be acquired at the same time (i.e. check the ranking when multiple hchan locks are acquired). Currently, I am doing a lockInit() call to set the lock rank of most locks. Any lock that is not otherwise initialized is assumed to be a leaf lock (a very high rank lock), so that eliminates the need to do anything for a bunch of locks (including all architecture-dependent locks). For two locks, root.lock and notifyList.lock (only in the runtime/sema.go file), it is not as easy to do lock initialization, so instead, I am passing the lock rank with the lock calls. For Windows compilation, I needed to increase the StackGuard size from 896 to 928 because of the new lock-rank checking functions. Checking of the static lock ranking is enabled by setting GOEXPERIMENT=staticlockranking before doing a run. To make sure that the static lock ranking code has no overhead in memory or CPU when not enabled by GOEXPERIMENT, I changed 'go build/install' so that it defines a build tag (with the same name) whenever any experiment has been baked into the toolchain (by checking Expstring()). This allows me to avoid increasing the size of the 'mutex' type when static lock ranking is not enabled. Fixes #38029 Change-Id: I154217ff307c47051f8dae9c2a03b53081acd83a Reviewed-on: https://go-review.googlesource.com/c/go/+/207619 Reviewed-by: Dan Scales <danscales@google.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Dan Scales <danscales@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2019-11-13 17:34:47 -08:00
lockWithRankMayAcquire(&mheap_.lock, lockRankMheap)
if s == nil {
// Allocate a new stack from the heap.
s = mheap_.allocManual(npage, spanAllocStack)
if s == nil {
throw("out of memory")
}
osStackAlloc(s)
s.elemsize = uintptr(n)
}
v = unsafe.Pointer(s.base())
}
runtime: add traceallocfree GODEBUG for alloc/free events in traces This change adds expensive alloc/free events to traces, guarded by a GODEBUG that can be set at run time by mutating the GODEBUG environment variable. This supersedes the alloc/free trace deleted in a previous CL. There are two parts to this CL. The first part is adding a mechanism for exposing experimental events through the tracer and trace parser. This boils down to a new ExperimentalEvent event type in the parser API which simply reveals the raw event data for the event. Each experimental event can also be associated with "experimental data" which is associated with a particular generation. This experimental data is just exposed as a bag of bytes that supplements the experimental events. In the runtime, this CL organizes experimental events by experiment. An experiment is defined by a set of experimental events and a single special batch type. Batches of this special type are exposed through the parser's API as the aforementioned "experimental data". The second part of this CL is defining the AllocFree experiment, which defines 9 new experimental events covering heap object alloc/frees, span alloc/frees, and goroutine stack alloc/frees. It also generates special batches that contain a type table: a mapping of IDs to type information. Change-Id: I965c00e3dcfdf5570f365ff89d0f70d8aeca219c Reviewed-on: https://go-review.googlesource.com/c/go/+/583377 Reviewed-by: Michael Pratt <mpratt@google.com> Auto-Submit: Michael Knyszek <mknyszek@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2024-04-24 16:26:39 +00:00
if traceAllocFreeEnabled() {
trace := traceAcquire()
runtime: add traceallocfree GODEBUG for alloc/free events in traces This change adds expensive alloc/free events to traces, guarded by a GODEBUG that can be set at run time by mutating the GODEBUG environment variable. This supersedes the alloc/free trace deleted in a previous CL. There are two parts to this CL. The first part is adding a mechanism for exposing experimental events through the tracer and trace parser. This boils down to a new ExperimentalEvent event type in the parser API which simply reveals the raw event data for the event. Each experimental event can also be associated with "experimental data" which is associated with a particular generation. This experimental data is just exposed as a bag of bytes that supplements the experimental events. In the runtime, this CL organizes experimental events by experiment. An experiment is defined by a set of experimental events and a single special batch type. Batches of this special type are exposed through the parser's API as the aforementioned "experimental data". The second part of this CL is defining the AllocFree experiment, which defines 9 new experimental events covering heap object alloc/frees, span alloc/frees, and goroutine stack alloc/frees. It also generates special batches that contain a type table: a mapping of IDs to type information. Change-Id: I965c00e3dcfdf5570f365ff89d0f70d8aeca219c Reviewed-on: https://go-review.googlesource.com/c/go/+/583377 Reviewed-by: Michael Pratt <mpratt@google.com> Auto-Submit: Michael Knyszek <mknyszek@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2024-04-24 16:26:39 +00:00
if trace.ok() {
trace.GoroutineStackAlloc(uintptr(v), uintptr(n))
traceRelease(trace)
}
}
if raceenabled {
racemalloc(v, uintptr(n))
}
if msanenabled {
msanmalloc(v, uintptr(n))
}
if asanenabled {
asanunpoison(v, uintptr(n))
}
runtime: add valgrind instrumentation Add build tag gated Valgrind annotations to the runtime which let it understand how the runtime manages memory. This allows for Go binaries to be run under Valgrind without emitting spurious errors. Instead of adding the Valgrind headers to the tree, and using cgo to call the various Valgrind client request macros, we just add an assembly function which emits the necessary instructions to trigger client requests. In particular we add instrumentation of the memory allocator, using a two-level mempool structure (as described in the Valgrind manual [0]). We also add annotations which allow Valgrind to track which memory we use for stacks, which seems necessary to let it properly function. We describe the memory model to Valgrind as follows: we treat heap arenas as a "pool" created with VALGRIND_CREATE_MEMPOOL_EXT (so that we can use VALGRIND_MEMPOOL_METAPOOL and VALGRIND_MEMPOOL_AUTO_FREE). Within the pool we treat spans as "superblocks", annotated with VALGRIND_MEMPOOL_ALLOC. We then allocate individual objects within spans with VALGRIND_MALLOCLIKE_BLOCK. It should be noted that running binaries under Valgrind can be _quite slow_, and certain operations, such as running the GC, can be _very slow_. It is recommended to run programs with GOGC=off. Additionally, async preemption should be turned off, since it'll cause strange behavior (GODEBUG=asyncpreemptoff=1). Running Valgrind with --leak-check=yes will result in some errors resulting from some things not being marked fully free'd. These likely need more annotations to rectify, but for now it is recommended to run with --leak-check=off. Updates #73602 [0] https://valgrind.org/docs/manual/mc-manual.html#mc-manual.mempools Change-Id: I71b26c47d7084de71ef1e03947ef6b1cc6d38301 Reviewed-on: https://go-review.googlesource.com/c/go/+/674077 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Michael Knyszek <mknyszek@google.com>
2025-03-22 00:58:55 +00:00
if valgrindenabled {
valgrindMalloc(v, uintptr(n))
}
if stackDebug >= 1 {
print(" allocated ", v, "\n")
}
return stack{uintptr(v), uintptr(v) + uintptr(n)}
}
// stackfree frees an n byte stack allocation at stk.
//
// stackfree must run on the system stack because it uses per-P
// resources and must not split the stack.
//
//go:systemstack
func stackfree(stk stack) {
gp := getg()
v := unsafe.Pointer(stk.lo)
n := stk.hi - stk.lo
if n&(n-1) != 0 {
throw("stack not a power of 2")
}
if stk.lo+n < stk.hi {
throw("bad stack size")
}
if stackDebug >= 1 {
println("stackfree", v, n)
memclrNoHeapPointers(v, n) // for testing, clobber stack data
}
if debug.efence != 0 || stackFromSystem != 0 {
if debug.efence != 0 || stackFaultOnFree != 0 {
sysFault(v, n)
} else {
sysFree(v, n, &memstats.stacks_sys)
}
return
}
runtime: add traceallocfree GODEBUG for alloc/free events in traces This change adds expensive alloc/free events to traces, guarded by a GODEBUG that can be set at run time by mutating the GODEBUG environment variable. This supersedes the alloc/free trace deleted in a previous CL. There are two parts to this CL. The first part is adding a mechanism for exposing experimental events through the tracer and trace parser. This boils down to a new ExperimentalEvent event type in the parser API which simply reveals the raw event data for the event. Each experimental event can also be associated with "experimental data" which is associated with a particular generation. This experimental data is just exposed as a bag of bytes that supplements the experimental events. In the runtime, this CL organizes experimental events by experiment. An experiment is defined by a set of experimental events and a single special batch type. Batches of this special type are exposed through the parser's API as the aforementioned "experimental data". The second part of this CL is defining the AllocFree experiment, which defines 9 new experimental events covering heap object alloc/frees, span alloc/frees, and goroutine stack alloc/frees. It also generates special batches that contain a type table: a mapping of IDs to type information. Change-Id: I965c00e3dcfdf5570f365ff89d0f70d8aeca219c Reviewed-on: https://go-review.googlesource.com/c/go/+/583377 Reviewed-by: Michael Pratt <mpratt@google.com> Auto-Submit: Michael Knyszek <mknyszek@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2024-04-24 16:26:39 +00:00
if traceAllocFreeEnabled() {
trace := traceAcquire()
runtime: add traceallocfree GODEBUG for alloc/free events in traces This change adds expensive alloc/free events to traces, guarded by a GODEBUG that can be set at run time by mutating the GODEBUG environment variable. This supersedes the alloc/free trace deleted in a previous CL. There are two parts to this CL. The first part is adding a mechanism for exposing experimental events through the tracer and trace parser. This boils down to a new ExperimentalEvent event type in the parser API which simply reveals the raw event data for the event. Each experimental event can also be associated with "experimental data" which is associated with a particular generation. This experimental data is just exposed as a bag of bytes that supplements the experimental events. In the runtime, this CL organizes experimental events by experiment. An experiment is defined by a set of experimental events and a single special batch type. Batches of this special type are exposed through the parser's API as the aforementioned "experimental data". The second part of this CL is defining the AllocFree experiment, which defines 9 new experimental events covering heap object alloc/frees, span alloc/frees, and goroutine stack alloc/frees. It also generates special batches that contain a type table: a mapping of IDs to type information. Change-Id: I965c00e3dcfdf5570f365ff89d0f70d8aeca219c Reviewed-on: https://go-review.googlesource.com/c/go/+/583377 Reviewed-by: Michael Pratt <mpratt@google.com> Auto-Submit: Michael Knyszek <mknyszek@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2024-04-24 16:26:39 +00:00
if trace.ok() {
trace.GoroutineStackFree(uintptr(v))
traceRelease(trace)
}
}
if msanenabled {
msanfree(v, n)
}
if asanenabled {
asanpoison(v, n)
}
runtime: add valgrind instrumentation Add build tag gated Valgrind annotations to the runtime which let it understand how the runtime manages memory. This allows for Go binaries to be run under Valgrind without emitting spurious errors. Instead of adding the Valgrind headers to the tree, and using cgo to call the various Valgrind client request macros, we just add an assembly function which emits the necessary instructions to trigger client requests. In particular we add instrumentation of the memory allocator, using a two-level mempool structure (as described in the Valgrind manual [0]). We also add annotations which allow Valgrind to track which memory we use for stacks, which seems necessary to let it properly function. We describe the memory model to Valgrind as follows: we treat heap arenas as a "pool" created with VALGRIND_CREATE_MEMPOOL_EXT (so that we can use VALGRIND_MEMPOOL_METAPOOL and VALGRIND_MEMPOOL_AUTO_FREE). Within the pool we treat spans as "superblocks", annotated with VALGRIND_MEMPOOL_ALLOC. We then allocate individual objects within spans with VALGRIND_MALLOCLIKE_BLOCK. It should be noted that running binaries under Valgrind can be _quite slow_, and certain operations, such as running the GC, can be _very slow_. It is recommended to run programs with GOGC=off. Additionally, async preemption should be turned off, since it'll cause strange behavior (GODEBUG=asyncpreemptoff=1). Running Valgrind with --leak-check=yes will result in some errors resulting from some things not being marked fully free'd. These likely need more annotations to rectify, but for now it is recommended to run with --leak-check=off. Updates #73602 [0] https://valgrind.org/docs/manual/mc-manual.html#mc-manual.mempools Change-Id: I71b26c47d7084de71ef1e03947ef6b1cc6d38301 Reviewed-on: https://go-review.googlesource.com/c/go/+/674077 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Michael Knyszek <mknyszek@google.com>
2025-03-22 00:58:55 +00:00
if valgrindenabled {
valgrindFree(v)
}
if n < fixedStack<<_NumStackOrders && n < _StackCacheSize {
order := uint8(0)
n2 := n
for n2 > fixedStack {
order++
n2 >>= 1
}
x := gclinkptr(v)
if stackNoCache != 0 || gp.m.p == 0 || gp.m.preemptoff != "" {
lock(&stackpool[order].item.mu)
runtime: add valgrind instrumentation Add build tag gated Valgrind annotations to the runtime which let it understand how the runtime manages memory. This allows for Go binaries to be run under Valgrind without emitting spurious errors. Instead of adding the Valgrind headers to the tree, and using cgo to call the various Valgrind client request macros, we just add an assembly function which emits the necessary instructions to trigger client requests. In particular we add instrumentation of the memory allocator, using a two-level mempool structure (as described in the Valgrind manual [0]). We also add annotations which allow Valgrind to track which memory we use for stacks, which seems necessary to let it properly function. We describe the memory model to Valgrind as follows: we treat heap arenas as a "pool" created with VALGRIND_CREATE_MEMPOOL_EXT (so that we can use VALGRIND_MEMPOOL_METAPOOL and VALGRIND_MEMPOOL_AUTO_FREE). Within the pool we treat spans as "superblocks", annotated with VALGRIND_MEMPOOL_ALLOC. We then allocate individual objects within spans with VALGRIND_MALLOCLIKE_BLOCK. It should be noted that running binaries under Valgrind can be _quite slow_, and certain operations, such as running the GC, can be _very slow_. It is recommended to run programs with GOGC=off. Additionally, async preemption should be turned off, since it'll cause strange behavior (GODEBUG=asyncpreemptoff=1). Running Valgrind with --leak-check=yes will result in some errors resulting from some things not being marked fully free'd. These likely need more annotations to rectify, but for now it is recommended to run with --leak-check=off. Updates #73602 [0] https://valgrind.org/docs/manual/mc-manual.html#mc-manual.mempools Change-Id: I71b26c47d7084de71ef1e03947ef6b1cc6d38301 Reviewed-on: https://go-review.googlesource.com/c/go/+/674077 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Michael Knyszek <mknyszek@google.com>
2025-03-22 00:58:55 +00:00
if valgrindenabled {
// x.ptr() is the head of the list of free stacks, and will be used
// when allocating a new stack, so it has to be marked allocated.
valgrindMalloc(unsafe.Pointer(x.ptr()), unsafe.Sizeof(x.ptr()))
}
stackpoolfree(x, order)
unlock(&stackpool[order].item.mu)
} else {
c := gp.m.p.ptr().mcache
if c.stackcache[order].size >= _StackCacheSize {
stackcacherelease(c, order)
}
runtime: add valgrind instrumentation Add build tag gated Valgrind annotations to the runtime which let it understand how the runtime manages memory. This allows for Go binaries to be run under Valgrind without emitting spurious errors. Instead of adding the Valgrind headers to the tree, and using cgo to call the various Valgrind client request macros, we just add an assembly function which emits the necessary instructions to trigger client requests. In particular we add instrumentation of the memory allocator, using a two-level mempool structure (as described in the Valgrind manual [0]). We also add annotations which allow Valgrind to track which memory we use for stacks, which seems necessary to let it properly function. We describe the memory model to Valgrind as follows: we treat heap arenas as a "pool" created with VALGRIND_CREATE_MEMPOOL_EXT (so that we can use VALGRIND_MEMPOOL_METAPOOL and VALGRIND_MEMPOOL_AUTO_FREE). Within the pool we treat spans as "superblocks", annotated with VALGRIND_MEMPOOL_ALLOC. We then allocate individual objects within spans with VALGRIND_MALLOCLIKE_BLOCK. It should be noted that running binaries under Valgrind can be _quite slow_, and certain operations, such as running the GC, can be _very slow_. It is recommended to run programs with GOGC=off. Additionally, async preemption should be turned off, since it'll cause strange behavior (GODEBUG=asyncpreemptoff=1). Running Valgrind with --leak-check=yes will result in some errors resulting from some things not being marked fully free'd. These likely need more annotations to rectify, but for now it is recommended to run with --leak-check=off. Updates #73602 [0] https://valgrind.org/docs/manual/mc-manual.html#mc-manual.mempools Change-Id: I71b26c47d7084de71ef1e03947ef6b1cc6d38301 Reviewed-on: https://go-review.googlesource.com/c/go/+/674077 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Michael Knyszek <mknyszek@google.com>
2025-03-22 00:58:55 +00:00
if valgrindenabled {
// x.ptr() is the head of the list of free stacks, and will
// be used when allocating a new stack, so it has to be
// marked allocated.
valgrindMalloc(unsafe.Pointer(x.ptr()), unsafe.Sizeof(x.ptr()))
}
x.ptr().next = c.stackcache[order].list
c.stackcache[order].list = x
c.stackcache[order].size += n
}
} else {
s := spanOfUnchecked(uintptr(v))
runtime: atomically set span state and use as publication barrier When everything is working correctly, any pointer the garbage collector encounters can only point into a fully initialized heap span, since the span must have been initialized before that pointer could escape the heap allocator and become visible to the GC. However, in various cases, we try to be defensive against bad pointers. In findObject, this is just a sanity check: we never expect to find a bad pointer, but programming errors can lead to them. In spanOfHeap, we don't necessarily trust the pointer and we're trying to check if it really does point to the heap, though it should always point to something. Conservative scanning takes this to a new level, since it can only guess that a word may be a pointer and verify this. In all of these cases, we have a problem that the span lookup and check can race with span initialization, since the span becomes visible to lookups before it's fully initialized. Furthermore, we're about to start initializing the span without the heap lock held, which is going to introduce races where accesses were previously protected by the heap lock. To address this, this CL makes accesses to mspan.state atomic, and ensures that the span is fully initialized before setting the state to mSpanInUse. All loads are now atomic, and in any case where we don't trust the pointer, it first atomically loads the span state and checks that it's mSpanInUse, after which it will have synchronized with span initialization and can safely check the other span fields. For #10958, #24543, but a good fix in general. Change-Id: I518b7c63555b02064b98aa5f802c92b758fef853 Reviewed-on: https://go-review.googlesource.com/c/go/+/203286 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Michael Knyszek <mknyszek@google.com>
2019-10-23 11:25:38 -04:00
if s.state.get() != mSpanManual {
println(hex(s.base()), v)
throw("bad span state")
}
runtime: don't free stack spans during GC Memory for stacks is manually managed by the runtime and, currently (with one exception) we free stack spans immediately when the last stack on a span is freed. However, the garbage collector assumes that spans can never transition from non-free to free during scan or mark. This disagreement makes it possible for the garbage collector to mark uninitialized objects and is blocking us from re-enabling the bad pointer test in the garbage collector (issue #9880). For example, the following sequence will result in marking an uninitialized object: 1. scanobject loads a pointer slot out of the object it's scanning. This happens to be one of the special pointers from the heap into a stack. Call the pointer p and suppose it points into X's stack. 2. X, running on another thread, grows its stack and frees its old stack. 3. The old stack happens to be large or was the last stack in its span, so X frees this span, setting it to state _MSpanFree. 4. The span gets reused as a heap span. 5. scanobject calls heapBitsForObject, which loads the span containing p, which is now in state _MSpanInUse, but doesn't necessarily have an object at p. The not-object at p gets marked, and at this point all sorts of things can go wrong. We already have a partial solution to this. When shrinking a stack, we put the old stack on a queue to be freed at the end of garbage collection. This was done to address exactly this problem, but wasn't a complete solution. This commit generalizes this solution to both shrinking and growing stacks. For stacks that fit in the stack pool, we simply don't free the span, even if its reference count reaches zero. It's fine to reuse the span for other stacks, and this enables that. At the end of GC, we sweep for cached stack spans with a zero reference count and free them. For larger stacks, we simply queue the stack span to be freed at the end of GC. Ideally, we would reuse these large stack spans the way we can small stack spans, but that's a more invasive change that will have to wait until after the freeze. Fixes #11267. Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71 Reviewed-on: https://go-review.googlesource.com/11502 Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
if gcphase == _GCoff {
// Free the stack immediately if we're
// sweeping.
osStackFree(s)
mheap_.freeManual(s, spanAllocStack)
runtime: don't free stack spans during GC Memory for stacks is manually managed by the runtime and, currently (with one exception) we free stack spans immediately when the last stack on a span is freed. However, the garbage collector assumes that spans can never transition from non-free to free during scan or mark. This disagreement makes it possible for the garbage collector to mark uninitialized objects and is blocking us from re-enabling the bad pointer test in the garbage collector (issue #9880). For example, the following sequence will result in marking an uninitialized object: 1. scanobject loads a pointer slot out of the object it's scanning. This happens to be one of the special pointers from the heap into a stack. Call the pointer p and suppose it points into X's stack. 2. X, running on another thread, grows its stack and frees its old stack. 3. The old stack happens to be large or was the last stack in its span, so X frees this span, setting it to state _MSpanFree. 4. The span gets reused as a heap span. 5. scanobject calls heapBitsForObject, which loads the span containing p, which is now in state _MSpanInUse, but doesn't necessarily have an object at p. The not-object at p gets marked, and at this point all sorts of things can go wrong. We already have a partial solution to this. When shrinking a stack, we put the old stack on a queue to be freed at the end of garbage collection. This was done to address exactly this problem, but wasn't a complete solution. This commit generalizes this solution to both shrinking and growing stacks. For stacks that fit in the stack pool, we simply don't free the span, even if its reference count reaches zero. It's fine to reuse the span for other stacks, and this enables that. At the end of GC, we sweep for cached stack spans with a zero reference count and free them. For larger stacks, we simply queue the stack span to be freed at the end of GC. Ideally, we would reuse these large stack spans the way we can small stack spans, but that's a more invasive change that will have to wait until after the freeze. Fixes #11267. Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71 Reviewed-on: https://go-review.googlesource.com/11502 Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
} else {
// If the GC is running, we can't return a
// stack span to the heap because it could be
// reused as a heap span, and this state
// change would race with GC. Add it to the
// large stack cache instead.
log2npage := stacklog2(s.npages)
lock(&stackLarge.lock)
stackLarge.free[log2npage].insert(s)
unlock(&stackLarge.lock)
runtime: don't free stack spans during GC Memory for stacks is manually managed by the runtime and, currently (with one exception) we free stack spans immediately when the last stack on a span is freed. However, the garbage collector assumes that spans can never transition from non-free to free during scan or mark. This disagreement makes it possible for the garbage collector to mark uninitialized objects and is blocking us from re-enabling the bad pointer test in the garbage collector (issue #9880). For example, the following sequence will result in marking an uninitialized object: 1. scanobject loads a pointer slot out of the object it's scanning. This happens to be one of the special pointers from the heap into a stack. Call the pointer p and suppose it points into X's stack. 2. X, running on another thread, grows its stack and frees its old stack. 3. The old stack happens to be large or was the last stack in its span, so X frees this span, setting it to state _MSpanFree. 4. The span gets reused as a heap span. 5. scanobject calls heapBitsForObject, which loads the span containing p, which is now in state _MSpanInUse, but doesn't necessarily have an object at p. The not-object at p gets marked, and at this point all sorts of things can go wrong. We already have a partial solution to this. When shrinking a stack, we put the old stack on a queue to be freed at the end of garbage collection. This was done to address exactly this problem, but wasn't a complete solution. This commit generalizes this solution to both shrinking and growing stacks. For stacks that fit in the stack pool, we simply don't free the span, even if its reference count reaches zero. It's fine to reuse the span for other stacks, and this enables that. At the end of GC, we sweep for cached stack spans with a zero reference count and free them. For larger stacks, we simply queue the stack span to be freed at the end of GC. Ideally, we would reuse these large stack spans the way we can small stack spans, but that's a more invasive change that will have to wait until after the freeze. Fixes #11267. Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71 Reviewed-on: https://go-review.googlesource.com/11502 Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
}
}
}
var maxstacksize uintptr = 1 << 20 // enough until runtime.main sets it for real
var maxstackceiling = maxstacksize
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss The bitmaps were 2 bits per pointer because we needed to distinguish scalar, pointer, multiword, and we used the leftover value to distinguish uninitialized from scalar, even though the garbage collector (GC) didn't care. Now that there are no multiword structures from the GC's point of view, cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not. The GC assumes the same layout for stack frames and for the maps describing the global data and bss sections, so change them all in one CL. The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since the 2-bit representation lives (at least for now) in some of the reflect data. Because these stack frame bitmaps are stored directly in the rodata in the binary, this CL reduces the size of the 6g binary by about 1.1%. Performance change is basically a wash, but using less memory, and smaller binaries, and enables other bitmap reductions. name old mean new mean delta BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005) BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001) BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141) BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001) BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095) BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008) BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014) BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364) BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010) BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368) BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484) BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543) BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000) BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023) BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126) BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975) BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153) BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597) BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804) BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881) BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561) BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000) BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000) BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000) BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000) BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019) BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000) BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025) BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000) BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000) BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305) BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000) BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465) BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075) BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337) BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291) BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507) BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313) BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312) BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000) BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007) BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670) BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828) BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392) BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813) BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000) BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000) BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000) BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985) BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320) BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799) BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667) BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001) BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000) BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011) BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185) BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001) BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000) BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000) BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000) BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979) BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777) BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771) BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004) BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004) BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000) BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081) BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027) BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022) BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064) BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001) BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007) BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143) BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278) BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252) BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003) BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581) BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002) BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879) BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257) BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678) BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000) BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000) BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000) BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000) BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000) BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000) BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767) BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347) BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793) Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9 Reviewed-on: https://go-review.googlesource.com/9406 Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 22:45:57 -04:00
var ptrnames = []string{
0: "scalar",
1: "ptr",
}
// Stack frame layout
//
// (x86)
// +------------------+
// | args from caller |
// +------------------+ <- frame->argp
// | return address |
// +------------------+
// | caller's BP (*) | (*) if framepointer_enabled && varp > sp
// +------------------+ <- frame->varp
// | locals |
// +------------------+
// | args to callee |
// +------------------+ <- frame->sp
//
// (arm)
// +------------------+
// | args from caller |
// +------------------+ <- frame->argp
// | caller's retaddr |
// +------------------+
// | caller's FP (*) | (*) on ARM64, if framepointer_enabled && varp > sp
// +------------------+ <- frame->varp
// | locals |
// +------------------+
// | args to callee |
// +------------------+
// | return address |
// +------------------+ <- frame->sp
//
// varp > sp means that the function has a frame;
// varp == sp means frameless function.
type adjustinfo struct {
old stack
delta uintptr // ptr distance from old to new stack (newbase - oldbase)
// sghi is the highest sudog.elem on the stack.
sghi uintptr
}
// adjustpointer checks whether *vpp is in the old stack described by adjinfo.
// If so, it rewrites *vpp to point into the new stack.
func adjustpointer(adjinfo *adjustinfo, vpp unsafe.Pointer) {
pp := (*uintptr)(vpp)
p := *pp
if stackDebug >= 4 {
print(" ", pp, ":", hex(p), "\n")
}
runtime: add valgrind instrumentation Add build tag gated Valgrind annotations to the runtime which let it understand how the runtime manages memory. This allows for Go binaries to be run under Valgrind without emitting spurious errors. Instead of adding the Valgrind headers to the tree, and using cgo to call the various Valgrind client request macros, we just add an assembly function which emits the necessary instructions to trigger client requests. In particular we add instrumentation of the memory allocator, using a two-level mempool structure (as described in the Valgrind manual [0]). We also add annotations which allow Valgrind to track which memory we use for stacks, which seems necessary to let it properly function. We describe the memory model to Valgrind as follows: we treat heap arenas as a "pool" created with VALGRIND_CREATE_MEMPOOL_EXT (so that we can use VALGRIND_MEMPOOL_METAPOOL and VALGRIND_MEMPOOL_AUTO_FREE). Within the pool we treat spans as "superblocks", annotated with VALGRIND_MEMPOOL_ALLOC. We then allocate individual objects within spans with VALGRIND_MALLOCLIKE_BLOCK. It should be noted that running binaries under Valgrind can be _quite slow_, and certain operations, such as running the GC, can be _very slow_. It is recommended to run programs with GOGC=off. Additionally, async preemption should be turned off, since it'll cause strange behavior (GODEBUG=asyncpreemptoff=1). Running Valgrind with --leak-check=yes will result in some errors resulting from some things not being marked fully free'd. These likely need more annotations to rectify, but for now it is recommended to run with --leak-check=off. Updates #73602 [0] https://valgrind.org/docs/manual/mc-manual.html#mc-manual.mempools Change-Id: I71b26c47d7084de71ef1e03947ef6b1cc6d38301 Reviewed-on: https://go-review.googlesource.com/c/go/+/674077 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Michael Knyszek <mknyszek@google.com>
2025-03-22 00:58:55 +00:00
if valgrindenabled {
// p is a pointer on a stack, it is inherently initialized, as
// everything on the stack is, but valgrind for _some unknown reason_
// sometimes thinks it's uninitialized, and flags operations on p below
// as uninitialized. We just initialize it if valgrind thinks its
// uninitialized.
//
// See go.dev/issues/73801.
valgrindMakeMemDefined(unsafe.Pointer(&p), unsafe.Sizeof(&p))
}
if adjinfo.old.lo <= p && p < adjinfo.old.hi {
*pp = p + adjinfo.delta
if stackDebug >= 3 {
print(" adjust ptr ", pp, ":", hex(p), " -> ", hex(*pp), "\n")
}
}
}
// Information from the compiler about the layout of stack frames.
// Note: this type must agree with reflect.bitVector.
type bitvector struct {
n int32 // # of bits
bytedata *uint8
}
// ptrbit returns the i'th bit in bv.
// ptrbit is less efficient than iterating directly over bitvector bits,
// and should only be used in non-performance-critical code.
// See adjustpointers for an example of a high-efficiency walk of a bitvector.
func (bv *bitvector) ptrbit(i uintptr) uint8 {
b := *(addb(bv.bytedata, i/8))
return (b >> (i % 8)) & 1
}
// bv describes the memory starting at address scanp.
// Adjust any pointers contained therein.
func adjustpointers(scanp unsafe.Pointer, bv *bitvector, adjinfo *adjustinfo, f funcInfo) {
minp := adjinfo.old.lo
maxp := adjinfo.old.hi
delta := adjinfo.delta
num := uintptr(bv.n)
// If this frame might contain channel receive slots, use CAS
// to adjust pointers. If the slot hasn't been received into
// yet, it may contain stack pointers and a concurrent send
// could race with adjusting those pointers. (The sent value
// itself can never contain stack pointers.)
useCAS := uintptr(scanp) < adjinfo.sghi
for i := uintptr(0); i < num; i += 8 {
if stackDebug >= 4 {
for j := uintptr(0); j < 8; j++ {
print(" ", add(scanp, (i+j)*goarch.PtrSize), ":", ptrnames[bv.ptrbit(i+j)], ":", hex(*(*uintptr)(add(scanp, (i+j)*goarch.PtrSize))), " # ", i, " ", *addb(bv.bytedata, i/8), "\n")
}
}
b := *(addb(bv.bytedata, i/8))
for b != 0 {
j := uintptr(sys.TrailingZeros8(b))
b &= b - 1
pp := (*uintptr)(add(scanp, (i+j)*goarch.PtrSize))
retry:
p := *pp
if f.valid() && 0 < p && p < minLegalPointer && debug.invalidptr != 0 {
// Looks like a junk value in a pointer slot.
// Live analysis wrong?
getg().m.traceback = 2
print("runtime: bad pointer in frame ", funcname(f), " at ", pp, ": ", hex(p), "\n")
throw("invalid pointer found on stack")
}
if minp <= p && p < maxp {
if stackDebug >= 3 {
print("adjust ptr ", hex(p), " ", funcname(f), "\n")
}
if useCAS {
ppu := (*unsafe.Pointer)(unsafe.Pointer(pp))
if !atomic.Casp1(ppu, unsafe.Pointer(p), unsafe.Pointer(p+delta)) {
goto retry
}
} else {
*pp = p + delta
}
}
}
}
}
// Note: the argument/return area is adjusted by the callee.
func adjustframe(frame *stkframe, adjinfo *adjustinfo) {
if frame.continpc == 0 {
// Frame is dead.
return
}
f := frame.fn
if stackDebug >= 2 {
print(" adjusting ", funcname(f), " frame=[", hex(frame.sp), ",", hex(frame.fp), "] pc=", hex(frame.pc), " continpc=", hex(frame.continpc), "\n")
}
// Adjust saved frame pointer if there is one.
if (goarch.ArchFamily == goarch.AMD64 || goarch.ArchFamily == goarch.ARM64) && frame.argp-frame.varp == 2*goarch.PtrSize {
if stackDebug >= 3 {
print(" saved bp\n")
}
if debugCheckBP {
// Frame pointers should always point to the next higher frame on
// the Go stack (or be nil, for the top frame on the stack).
bp := *(*uintptr)(unsafe.Pointer(frame.varp))
if bp != 0 && (bp < adjinfo.old.lo || bp >= adjinfo.old.hi) {
println("runtime: found invalid frame pointer")
print("bp=", hex(bp), " min=", hex(adjinfo.old.lo), " max=", hex(adjinfo.old.hi), "\n")
throw("bad frame pointer")
}
}
// On AMD64, this is the caller's frame pointer saved in the current
// frame.
// On ARM64, this is the frame pointer of the caller's caller saved
// by the caller in its frame (one word below its SP).
adjustpointer(adjinfo, unsafe.Pointer(frame.varp))
}
locals, args, objs := frame.getStackMap(true)
runtime: fix systemstack frame pointer adjustment Change adjustframe to adjust the frame pointer of systemstack (aka FuncID_systemstack_switch) before returning early. Without this fix it is possible for traceEvent() to crash when using frame pointer unwinding. The issue occurs when a goroutine calls systemstack in order to call shrinkstack. While returning, systemstack will restore the unadjusted frame pointer from its frame as part of its epilogue. If the callee of systemstack then triggers a traceEvent, it will try to unwind into the old stack. This can lead to a crash if the memory of the old stack has been reused or freed in the meantime. The most common situation in which this will manifest is when when gcAssistAlloc() invokes gcAssistAlloc1() on systemstack() and performs a shrinkstack() followed by a traceGCMarkAssistDone() or Gosched() triggering traceEvent(). See CL 489115 for a deterministic test case that triggers the issue. Meanwhile the problem can frequently be observed using the command below: $ GODEBUG=tracefpunwindoff=0 ../bin/go test -trace /dev/null -run TestDeferHeapAndStack ./runtime SIGSEGV: segmentation violation PC=0x45f977 m=14 sigcode=128 goroutine 0 [idle]: runtime.fpTracebackPCs(...) .../go/src/runtime/trace.go:945 runtime.traceStackID(0xcdab904677a?, {0x7f1584346018, 0x0?, 0x80}, 0x0?) .../go/src/runtime/trace.go:917 +0x217 fp=0x7f1565ffab00 sp=0x7f1565ffaab8 pc=0x45f977 runtime.traceEventLocked(0x0?, 0x0?, 0x0?, 0xc00003dbd0, 0x12, 0x0, 0x1, {0x0, 0x0, 0x0}) .../go/src/runtime/trace.go:760 +0x285 fp=0x7f1565ffab78 sp=0x7f1565ffab00 pc=0x45ef45 runtime.traceEvent(0xf5?, 0x1, {0x0, 0x0, 0x0}) .../go/src/runtime/trace.go:692 +0xa9 fp=0x7f1565ffabe0 sp=0x7f1565ffab78 pc=0x45ec49 runtime.traceGoPreempt(...) .../go/src/runtime/trace.go:1535 runtime.gopreempt_m(0xc000328340?) .../go/src/runtime/proc.go:3551 +0x45 fp=0x7f1565ffac20 sp=0x7f1565ffabe0 pc=0x4449a5 runtime.newstack() .../go/src/runtime/stack.go:1077 +0x3cb fp=0x7f1565ffadd0 sp=0x7f1565ffac20 pc=0x455feb runtime.morestack() .../go/src/runtime/asm_amd64.s:593 +0x8f fp=0x7f1565ffadd8 sp=0x7f1565ffadd0 pc=0x47644f goroutine 19 [running]: runtime.traceEvent(0x2c?, 0xffffffffffffffff, {0x0, 0x0, 0x0}) .../go/src/runtime/trace.go:669 +0xe8 fp=0xc0006e6c28 sp=0xc0006e6c20 pc=0x45ec88 runtime.traceGCMarkAssistDone(...) .../go/src/runtime/trace.go:1497 runtime.gcAssistAlloc(0xc0003281a0) .../go/src/runtime/mgcmark.go:517 +0x27d fp=0xc0006e6c88 sp=0xc0006e6c28 pc=0x421a1d runtime.deductAssistCredit(0x0?) .../go/src/runtime/malloc.go:1287 +0x54 fp=0xc0006e6cb0 sp=0xc0006e6c88 pc=0x40fed4 runtime.mallocgc(0x400, 0x7a9420, 0x1) .../go/src/runtime/malloc.go:1002 +0xc9 fp=0xc0006e6d18 sp=0xc0006e6cb0 pc=0x40f709 runtime.newobject(0xb3?) .../go/src/runtime/malloc.go:1324 +0x25 fp=0xc0006e6d40 sp=0xc0006e6d18 pc=0x40ffc5 runtime_test.deferHeapAndStack(0xb4) .../go/src/runtime/stack_test.go:924 +0x165 fp=0xc0006e6e20 sp=0xc0006e6d40 pc=0x75c2a5 Fixes #59692 Co-Authored-By: Cherry Mui <cherryyz@google.com> Co-Authored-By: Michael Knyszek <mknyszek@google.com> Co-Authored-By: Nick Ripley <nick.ripley@datadoghq.com> Change-Id: I1c0c28327fc2fac0b8cfdbaa72e25584331be31e Reviewed-on: https://go-review.googlesource.com/c/go/+/489015 TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: Michael Knyszek <mknyszek@google.com> Run-TryBot: Felix Geisendörfer <felix.geisendoerfer@datadoghq.com>
2023-04-22 09:54:46 +03:00
// Adjust local variables if stack frame has been allocated.
if locals.n > 0 {
size := uintptr(locals.n) * goarch.PtrSize
adjustpointers(unsafe.Pointer(frame.varp-size), &locals, adjinfo, f)
}
// Adjust arguments.
if args.n > 0 {
if stackDebug >= 3 {
print(" args\n")
}
adjustpointers(unsafe.Pointer(frame.argp), &args, adjinfo, funcInfo{})
}
// Adjust pointers in all stack objects (whether they are live or not).
// See comments in mgcmark.go:scanframeworker.
if frame.varp != 0 {
cmd/link,runtime: remove relocations from stkobjs Use an offset from go.func.* instead. This removes the last relocation from funcdata symbols, which lets us simplify that code. size before after Δ % addr2line 3683218 3680706 -2512 -0.068% api 4951074 4944850 -6224 -0.126% asm 4744258 4757586 +13328 +0.281% buildid 2419986 2418546 -1440 -0.060% cgo 4218306 4197346 -20960 -0.497% compile 22132066 22076882 -55184 -0.249% cover 4432834 4411362 -21472 -0.484% dist 3111202 3091346 -19856 -0.638% doc 3583602 3563234 -20368 -0.568% fix 3023922 3020658 -3264 -0.108% link 6188034 6164642 -23392 -0.378% nm 3665826 3646818 -19008 -0.519% objdump 4015234 4012450 -2784 -0.069% pack 2155010 2153554 -1456 -0.068% pprof 13044178 13011522 -32656 -0.250% test2json 2402146 2383906 -18240 -0.759% trace 9765410 9736514 -28896 -0.296% vet 6681250 6655058 -26192 -0.392% total 104217556 103926980 -290576 -0.279% relocs before after Δ % addr2line 25563 25066 -497 -1.944% api 18409 17176 -1233 -6.698% asm 18903 18271 -632 -3.343% buildid 9513 9233 -280 -2.943% cgo 17103 16222 -881 -5.151% compile 64825 60421 -4404 -6.794% cover 19464 18479 -985 -5.061% dist 10798 10135 -663 -6.140% doc 13503 12735 -768 -5.688% fix 11465 10820 -645 -5.626% link 23214 21849 -1365 -5.880% nm 25480 24987 -493 -1.935% objdump 26610 26057 -553 -2.078% pack 7951 7665 -286 -3.597% pprof 63964 60761 -3203 -5.008% test2json 8735 8389 -346 -3.961% trace 39639 37180 -2459 -6.203% vet 25970 24044 -1926 -7.416% total 431108 409489 -21619 -5.015% Change-Id: I43c26196a008da6d1cb3a782eea2f428778bd569 Reviewed-on: https://go-review.googlesource.com/c/go/+/353138 Trust: Josh Bleecher Snyder <josharian@gmail.com> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Cherry Mui <cherryyz@google.com>
2021-09-27 14:27:20 -07:00
for i := range objs {
obj := &objs[i]
off := obj.off
base := frame.varp // locals base pointer
if off >= 0 {
base = frame.argp // arguments and return values base pointer
}
p := base + uintptr(off)
if p < frame.sp {
// Object hasn't been allocated in the frame yet.
// (Happens when the stack bounds check fails and
// we call into morestack.)
continue
}
ptrBytes, gcData := obj.gcdata()
for i := uintptr(0); i < ptrBytes; i += goarch.PtrSize {
if *addb(gcData, i/(8*goarch.PtrSize))>>(i/goarch.PtrSize&7)&1 != 0 {
adjustpointer(adjinfo, unsafe.Pointer(p+i))
}
}
}
}
}
func adjustctxt(gp *g, adjinfo *adjustinfo) {
adjustpointer(adjinfo, unsafe.Pointer(&gp.sched.ctxt))
if !framepointer_enabled {
return
}
if debugCheckBP {
bp := gp.sched.bp
if bp != 0 && (bp < adjinfo.old.lo || bp >= adjinfo.old.hi) {
println("runtime: found invalid top frame pointer")
print("bp=", hex(bp), " min=", hex(adjinfo.old.lo), " max=", hex(adjinfo.old.hi), "\n")
throw("bad top frame pointer")
}
}
oldfp := gp.sched.bp
adjustpointer(adjinfo, unsafe.Pointer(&gp.sched.bp))
if GOARCH == "arm64" {
// On ARM64, the frame pointer is saved one word *below* the SP,
// which is not copied or adjusted in any frame. Do it explicitly
// here.
if oldfp == gp.sched.sp-goarch.PtrSize {
memmove(unsafe.Pointer(gp.sched.bp), unsafe.Pointer(oldfp), goarch.PtrSize)
adjustpointer(adjinfo, unsafe.Pointer(gp.sched.bp))
}
}
}
func adjustdefers(gp *g, adjinfo *adjustinfo) {
// Adjust pointers in the Defer structs.
// We need to do this first because we need to adjust the
// defer.link fields so we always work on the new stack.
adjustpointer(adjinfo, unsafe.Pointer(&gp._defer))
for d := gp._defer; d != nil; d = d.link {
adjustpointer(adjinfo, unsafe.Pointer(&d.fn))
adjustpointer(adjinfo, unsafe.Pointer(&d.sp))
adjustpointer(adjinfo, unsafe.Pointer(&d.link))
}
}
func adjustpanics(gp *g, adjinfo *adjustinfo) {
// Panics are on stack and already adjusted.
// Update pointer to head of list in G.
adjustpointer(adjinfo, unsafe.Pointer(&gp._panic))
}
func adjustsudogs(gp *g, adjinfo *adjustinfo) {
// the data elements pointed to by a SudoG structure
// might be in the stack.
for s := gp.waiting; s != nil; s = s.waitlink {
adjustpointer(adjinfo, unsafe.Pointer(&s.elem.vu))
adjustpointer(adjinfo, unsafe.Pointer(&s.elem.vp))
}
}
func fillstack(stk stack, b byte) {
for p := stk.lo; p < stk.hi; p++ {
*(*byte)(unsafe.Pointer(p)) = b
}
}
func findsghi(gp *g, stk stack) uintptr {
var sghi uintptr
for sg := gp.waiting; sg != nil; sg = sg.waitlink {
p := sg.elem.uintptr() + uintptr(sg.c.get().elemsize)
if stk.lo <= p && p < stk.hi && p > sghi {
sghi = p
}
}
return sghi
}
// syncadjustsudogs adjusts gp's sudogs and copies the part of gp's
// stack they refer to while synchronizing with concurrent channel
// operations. It returns the number of bytes of stack copied.
func syncadjustsudogs(gp *g, used uintptr, adjinfo *adjustinfo) uintptr {
if gp.waiting == nil {
return 0
}
// Lock channels to prevent concurrent send/receive.
var lastc *hchan
for sg := gp.waiting; sg != nil; sg = sg.waitlink {
if sg.c.get() != lastc {
// There is a ranking cycle here between gscan bit and
// hchan locks. Normally, we only allow acquiring hchan
// locks and then getting a gscan bit. In this case, we
// already have the gscan bit. We allow acquiring hchan
// locks here as a special case, since a deadlock can't
// happen because the G involved must already be
// suspended. So, we get a special hchan lock rank here
// that is lower than gscan, but doesn't allow acquiring
// any other locks other than hchan.
lockWithRank(&sg.c.get().lock, lockRankHchanLeaf)
}
lastc = sg.c.get()
}
// Adjust sudogs.
adjustsudogs(gp, adjinfo)
// Copy the part of the stack the sudogs point in to
// while holding the lock to prevent races on
// send/receive slots.
var sgsize uintptr
if adjinfo.sghi != 0 {
oldBot := adjinfo.old.hi - used
newBot := oldBot + adjinfo.delta
sgsize = adjinfo.sghi - oldBot
memmove(unsafe.Pointer(newBot), unsafe.Pointer(oldBot), sgsize)
}
// Unlock channels.
lastc = nil
for sg := gp.waiting; sg != nil; sg = sg.waitlink {
if sg.c.get() != lastc {
unlock(&sg.c.get().lock)
}
lastc = sg.c.get()
}
return sgsize
}
// Copies gp's stack to a new stack of a different size.
// Caller must have changed gp status to Gcopystack.
runtime: make copystack/sudog synchronization more explicit When we copy a stack of a goroutine blocked in a channel operation, we have to be very careful because other goroutines may be writing to that goroutine's stack. To handle this, stack copying acquires the locks for the channels a goroutine is waiting on. One complication is that stack growth may happen while a goroutine holds these locks, in which case stack copying must *not* acquire these locks because that would self-deadlock. Currently, stack growth never acquires these locks because stack growth only happens when a goroutine is running, which means it's either not blocking on a channel or it's holding the channel locks already. Stack shrinking always acquires these locks because shrinking happens asynchronously, so the goroutine is never running, so there are either no locks or they've been released by the goroutine. However, we're about to change when stack shrinking can happen, which is going to break the current rules. Rather than find a new way to derive whether to acquire these locks or not, this CL simply adds a flag to the g struct that indicates that stack copying should acquire channel locks. This flag is set while the goroutine is blocked on a channel op. For #10958, #24543. Change-Id: Ia2ac8831b1bfda98d39bb30285e144c4f7eaf9ab Reviewed-on: https://go-review.googlesource.com/c/go/+/172982 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Michael Knyszek <mknyszek@google.com>
2019-04-03 14:00:12 -04:00
func copystack(gp *g, newsize uintptr) {
if gp.syscallsp != 0 {
throw("stack growth not allowed in system call")
}
old := gp.stack
if old.lo == 0 {
throw("nil stackbase")
}
used := old.hi - gp.sched.sp
// Add just the difference to gcController.addScannableStack.
// g0 stacks never move, so this will never account for them.
// It's also fine if we have no P, addScannableStack can deal with
// that case.
gcController.addScannableStack(getg().m.p.ptr(), int64(newsize)-int64(old.hi-old.lo))
// allocate new stack
new := stackalloc(uint32(newsize))
if stackPoisonCopy != 0 {
fillstack(new, 0xfd)
}
if stackDebug >= 1 {
print("copystack gp=", gp, " [", hex(old.lo), " ", hex(old.hi-used), " ", hex(old.hi), "]", " -> [", hex(new.lo), " ", hex(new.hi-used), " ", hex(new.hi), "]/", newsize, "\n")
}
// Compute adjustment.
var adjinfo adjustinfo
adjinfo.old = old
adjinfo.delta = new.hi - old.hi
// Adjust sudogs, synchronizing with channel ops if necessary.
ncopy := used
runtime: make copystack/sudog synchronization more explicit When we copy a stack of a goroutine blocked in a channel operation, we have to be very careful because other goroutines may be writing to that goroutine's stack. To handle this, stack copying acquires the locks for the channels a goroutine is waiting on. One complication is that stack growth may happen while a goroutine holds these locks, in which case stack copying must *not* acquire these locks because that would self-deadlock. Currently, stack growth never acquires these locks because stack growth only happens when a goroutine is running, which means it's either not blocking on a channel or it's holding the channel locks already. Stack shrinking always acquires these locks because shrinking happens asynchronously, so the goroutine is never running, so there are either no locks or they've been released by the goroutine. However, we're about to change when stack shrinking can happen, which is going to break the current rules. Rather than find a new way to derive whether to acquire these locks or not, this CL simply adds a flag to the g struct that indicates that stack copying should acquire channel locks. This flag is set while the goroutine is blocked on a channel op. For #10958, #24543. Change-Id: Ia2ac8831b1bfda98d39bb30285e144c4f7eaf9ab Reviewed-on: https://go-review.googlesource.com/c/go/+/172982 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Michael Knyszek <mknyszek@google.com>
2019-04-03 14:00:12 -04:00
if !gp.activeStackChans {
if newsize < old.hi-old.lo && gp.parkingOnChan.Load() {
runtime: disable stack shrinking in activeStackChans race window Currently activeStackChans is set before a goroutine blocks on a channel operation in an unlockf passed to gopark. The trouble is that the unlockf is called *after* the G's status is changed, and the G's status is what is used by a concurrent mark worker (calling suspendG) to determine that a G has successfully been suspended. In this window between the status change and unlockf, the mark worker could try to shrink the G's stack, and in particular observe that activeStackChans is false. This observation will cause the mark worker to *not* synchronize with concurrent channel operations when it should, and so updating pointers in the sudog for the blocked goroutine (which may point to the goroutine's stack) races with channel operations which may also manipulate the pointer (read it, dereference it, update it, etc.). Fix the problem by adding a new atomically-updated flag to the g struct called parkingOnChan, which is non-zero in the race window above. Then, in isShrinkStackSafe, check if parkingOnChan is zero. The race is resolved like so: * Blocking G sets parkingOnChan, then changes status in gopark. * Mark worker successfully suspends blocking G. * If the mark worker observes parkingOnChan is non-zero when checking isShrinkStackSafe, then it's not safe to shrink (we're in the race window). * If the mark worker observes parkingOnChan as zero, then because the mark worker observed the G status change, it can be sure that gopark's unlockf completed, and gp.activeStackChans will be correct. The risk of this change is low, since although it reduces the number of places that stack shrinking is allowed, the window here is incredibly small. Essentially, every place that it might crash now is replaced with no shrink. This change adds a test, but the race window is so small that it's hard to trigger without a well-placed sleep in park_m. Also, this change fixes stackGrowRecursive in proc_test.go to actually allocate a 128-byte stack frame. It turns out the compiler was destructuring the "pad" field and only allocating one uint64 on the stack. Fixes #40641. Change-Id: I7dfbe7d460f6972b8956116b137bc13bc24464e8 Reviewed-on: https://go-review.googlesource.com/c/go/+/247050 Run-TryBot: Michael Knyszek <mknyszek@google.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Michael Pratt <mpratt@google.com> Trust: Michael Knyszek <mknyszek@google.com>
2020-08-10 20:02:22 +00:00
// It's not safe for someone to shrink this stack while we're actively
// parking on a channel, but it is safe to grow since we do that
// ourselves and explicitly don't want to synchronize with channels
// since we could self-deadlock.
throw("racy sudog adjustment due to parking on channel")
}
adjustsudogs(gp, &adjinfo)
} else {
runtime: make copystack/sudog synchronization more explicit When we copy a stack of a goroutine blocked in a channel operation, we have to be very careful because other goroutines may be writing to that goroutine's stack. To handle this, stack copying acquires the locks for the channels a goroutine is waiting on. One complication is that stack growth may happen while a goroutine holds these locks, in which case stack copying must *not* acquire these locks because that would self-deadlock. Currently, stack growth never acquires these locks because stack growth only happens when a goroutine is running, which means it's either not blocking on a channel or it's holding the channel locks already. Stack shrinking always acquires these locks because shrinking happens asynchronously, so the goroutine is never running, so there are either no locks or they've been released by the goroutine. However, we're about to change when stack shrinking can happen, which is going to break the current rules. Rather than find a new way to derive whether to acquire these locks or not, this CL simply adds a flag to the g struct that indicates that stack copying should acquire channel locks. This flag is set while the goroutine is blocked on a channel op. For #10958, #24543. Change-Id: Ia2ac8831b1bfda98d39bb30285e144c4f7eaf9ab Reviewed-on: https://go-review.googlesource.com/c/go/+/172982 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Michael Knyszek <mknyszek@google.com>
2019-04-03 14:00:12 -04:00
// sudogs may be pointing in to the stack and gp has
// released channel locks, so other goroutines could
// be writing to gp's stack. Find the highest such
// pointer so we can handle everything there and below
// carefully. (This shouldn't be far from the bottom
// of the stack, so there's little cost in handling
// everything below it carefully.)
adjinfo.sghi = findsghi(gp, old)
// Synchronize with channel ops and copy the part of
// the stack they may interact with.
ncopy -= syncadjustsudogs(gp, used, &adjinfo)
}
// Copy the stack (or the rest of it) to the new location
memmove(unsafe.Pointer(new.hi-ncopy), unsafe.Pointer(old.hi-ncopy), ncopy)
// Adjust remaining structures that have pointers into stacks.
// We have to do most of these before we traceback the new
// stack because gentraceback uses them.
adjustctxt(gp, &adjinfo)
adjustdefers(gp, &adjinfo)
adjustpanics(gp, &adjinfo)
if adjinfo.sghi != 0 {
adjinfo.sghi += adjinfo.delta
}
// Swap out old stack for new one
gp.stack = new
gp.stackguard0 = new.lo + stackGuard // NOTE: might clobber a preempt request
gp.sched.sp = new.hi - used
gp.stktopsp += adjinfo.delta
// Adjust pointers in the new stack.
var u unwinder
for u.init(gp, 0); u.valid(); u.next() {
adjustframe(&u.frame, &adjinfo)
}
runtime: add valgrind instrumentation Add build tag gated Valgrind annotations to the runtime which let it understand how the runtime manages memory. This allows for Go binaries to be run under Valgrind without emitting spurious errors. Instead of adding the Valgrind headers to the tree, and using cgo to call the various Valgrind client request macros, we just add an assembly function which emits the necessary instructions to trigger client requests. In particular we add instrumentation of the memory allocator, using a two-level mempool structure (as described in the Valgrind manual [0]). We also add annotations which allow Valgrind to track which memory we use for stacks, which seems necessary to let it properly function. We describe the memory model to Valgrind as follows: we treat heap arenas as a "pool" created with VALGRIND_CREATE_MEMPOOL_EXT (so that we can use VALGRIND_MEMPOOL_METAPOOL and VALGRIND_MEMPOOL_AUTO_FREE). Within the pool we treat spans as "superblocks", annotated with VALGRIND_MEMPOOL_ALLOC. We then allocate individual objects within spans with VALGRIND_MALLOCLIKE_BLOCK. It should be noted that running binaries under Valgrind can be _quite slow_, and certain operations, such as running the GC, can be _very slow_. It is recommended to run programs with GOGC=off. Additionally, async preemption should be turned off, since it'll cause strange behavior (GODEBUG=asyncpreemptoff=1). Running Valgrind with --leak-check=yes will result in some errors resulting from some things not being marked fully free'd. These likely need more annotations to rectify, but for now it is recommended to run with --leak-check=off. Updates #73602 [0] https://valgrind.org/docs/manual/mc-manual.html#mc-manual.mempools Change-Id: I71b26c47d7084de71ef1e03947ef6b1cc6d38301 Reviewed-on: https://go-review.googlesource.com/c/go/+/674077 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Michael Knyszek <mknyszek@google.com>
2025-03-22 00:58:55 +00:00
if valgrindenabled {
if gp.valgrindStackID == 0 {
gp.valgrindStackID = valgrindRegisterStack(unsafe.Pointer(new.lo), unsafe.Pointer(new.hi))
} else {
valgrindChangeStack(gp.valgrindStackID, unsafe.Pointer(new.lo), unsafe.Pointer(new.hi))
}
}
// free old stack
if goexperiment.RuntimeSecret && gp.secret > 0 {
// Some portion of the old stack has secret stuff on it.
// We don't really know where we entered secret mode,
// so just clear the whole thing.
// TODO(dmo): traceback until we hit secret.Do? clearing
// is fast and optimized, might not be worth it.
memclrNoHeapPointers(unsafe.Pointer(old.lo), old.hi-old.lo)
// The memmove call above might put secrets from the stack into registers.
secretEraseRegisters()
}
if stackPoisonCopy != 0 {
fillstack(old, 0xfc)
}
stackfree(old)
}
// round x up to a power of 2.
func round2(x int32) int32 {
s := uint(0)
for 1<<s < x {
s++
}
return 1 << s
}
// Called from runtime·morestack when more stack is needed.
// Allocate larger stack and relocate to new stack.
// Stack growth is multiplicative, for constant amortized cost.
//
// g->atomicstatus will be Grunning or Gscanrunning upon entry.
// If the scheduler is trying to stop this g, then it will set preemptStop.
//
runtime: remove write barriers from newstack, gogo Currently, newstack and gogo have write barriers for maintaining the context register saved in g.sched.ctxt. This is troublesome, because newstack can be called from go:nowritebarrierrec places that can't allow write barriers. It happens to be benign because g.sched.ctxt will always be nil on entry to newstack *and* it so happens the incoming ctxt will also always be nil in these contexts (I think/hope), but this is playing with fire. It's also desirable to mark newstack go:nowritebarrierrec to prevent any other, non-benign write barriers from creeping in, but we can't do that right now because of this one write barrier. Fix all of this by observing that g.sched.ctxt is really just a saved live pointer register. Hence, we can shade it when we scan g's stack and otherwise move it back and forth between the actual context register and g.sched.ctxt without write barriers. This means we can save it in morestack along with all of the other g.sched, eliminate the save from newstack along with its troublesome write barrier, and eliminate the shenanigans in gogo to invoke the write barrier when restoring it. Once we've done all of this, we can mark newstack go:nowritebarrierrec. Fixes #22385. For #22460. Change-Id: I43c24958e3f6785b53c1350e1e83c2844e0d1522 Reviewed-on: https://go-review.googlesource.com/72553 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Rick Hudson <rlh@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
2017-10-22 21:37:05 -04:00
// This must be nowritebarrierrec because it can be called as part of
// stack growth from other nowritebarrierrec functions, but the
// compiler doesn't check this.
//
//go:nowritebarrierrec
func newstack() {
thisg := getg()
// TODO: double check all gp. shouldn't be getg().
if thisg.m.morebuf.g.ptr().stackguard0 == stackFork {
throw("stack growth after fork")
}
if thisg.m.morebuf.g.ptr() != thisg.m.curg {
print("runtime: newstack called from g=", hex(thisg.m.morebuf.g), "\n"+"\tm=", thisg.m, " m->curg=", thisg.m.curg, " m->g0=", thisg.m.g0, " m->gsignal=", thisg.m.gsignal, "\n")
morebuf := thisg.m.morebuf
traceback(morebuf.pc, morebuf.sp, morebuf.lr, morebuf.g.ptr())
throw("runtime: wrong goroutine in newstack")
}
gp := thisg.m.curg
if goexperiment.RuntimeSecret && gp.secret > 0 {
// If we're entering here from a secret context, clear
// all the registers. This is important because we
// might context switch to a different goroutine which
// is not in secret mode, and it will not be careful
// about clearing its registers.
secretEraseRegisters()
}
if thisg.m.curg.throwsplit {
// Update syscallsp, syscallpc in case traceback uses them.
morebuf := thisg.m.morebuf
gp.syscallsp = morebuf.sp
gp.syscallpc = morebuf.pc
pcname, pcoff := "(unknown)", uintptr(0)
f := findfunc(gp.sched.pc)
if f.valid() {
pcname = funcname(f)
pcoff = gp.sched.pc - f.entry()
}
print("runtime: newstack at ", pcname, "+", hex(pcoff),
" sp=", hex(gp.sched.sp), " stack=[", hex(gp.stack.lo), ", ", hex(gp.stack.hi), "]\n",
"\tmorebuf={pc:", hex(morebuf.pc), " sp:", hex(morebuf.sp), " lr:", hex(morebuf.lr), "}\n",
"\tsched={pc:", hex(gp.sched.pc), " sp:", hex(gp.sched.sp), " lr:", hex(gp.sched.lr), " ctxt:", gp.sched.ctxt, "}\n")
thisg.m.traceback = 2 // Include runtime frames
traceback(morebuf.pc, morebuf.sp, morebuf.lr, gp)
throw("runtime: stack split at bad time")
}
morebuf := thisg.m.morebuf
thisg.m.morebuf.pc = 0
thisg.m.morebuf.lr = 0
thisg.m.morebuf.sp = 0
thisg.m.morebuf.g = 0
// NOTE: stackguard0 may change underfoot, if another thread
// is about to try to preempt gp. Read it just once and use that same
// value now and below.
stackguard0 := atomic.Loaduintptr(&gp.stackguard0)
// Be conservative about where we preempt.
// We are interested in preempting user Go code, not runtime code.
// If we're holding locks, mallocing, or preemption is disabled, don't
// preempt.
// This check is very early in newstack so that even the status change
// from Grunning to Gwaiting and back doesn't happen in this case.
// That status change by itself can be viewed as a small preemption,
// because the GC might change Gwaiting to Gscanwaiting, and then
// this goroutine has to wait for the GC to finish before continuing.
// If the GC is in some way dependent on this goroutine (for example,
// it needs a lock held by the goroutine), that small preemption turns
// into a real deadlock.
preempt := stackguard0 == stackPreempt
if preempt {
if !canPreemptM(thisg.m) {
// Let the goroutine keep running for now.
// gp->preempt is set, so it will be preempted next time.
gp.stackguard0 = gp.stack.lo + stackGuard
gogo(&gp.sched) // never return
}
}
if gp.stack.lo == 0 {
throw("missing stack in newstack")
}
sp := gp.sched.sp
if goarch.ArchFamily == goarch.AMD64 || goarch.ArchFamily == goarch.I386 || goarch.ArchFamily == goarch.WASM {
// The call to morestack cost a word.
sp -= goarch.PtrSize
}
if stackDebug >= 1 || sp < gp.stack.lo {
print("runtime: newstack sp=", hex(sp), " stack=[", hex(gp.stack.lo), ", ", hex(gp.stack.hi), "]\n",
"\tmorebuf={pc:", hex(morebuf.pc), " sp:", hex(morebuf.sp), " lr:", hex(morebuf.lr), "}\n",
"\tsched={pc:", hex(gp.sched.pc), " sp:", hex(gp.sched.sp), " lr:", hex(gp.sched.lr), " ctxt:", gp.sched.ctxt, "}\n")
}
if sp < gp.stack.lo {
print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->status=", hex(readgstatus(gp)), "\n ")
print("runtime: split stack overflow: ", hex(sp), " < ", hex(gp.stack.lo), "\n")
throw("runtime: split stack overflow")
}
if preempt {
if gp == thisg.m.g0 {
throw("runtime: preempt g0")
}
if thisg.m.p == 0 && thisg.m.locks == 0 {
throw("runtime: g is running but p is not")
}
2019-09-27 12:27:51 -04:00
if gp.preemptShrink {
// We're at a synchronous safe point now, so
// do the pending stack shrink.
gp.preemptShrink = false
shrinkstack(gp)
}
// Set a flag indicated that we've been synchronously preempted.
gp.syncSafePoint = true
2019-09-27 12:27:51 -04:00
if gp.preemptStop {
preemptPark(gp) // never returns
}
// Act like goroutine called runtime.Gosched.
gopreempt_m(gp) // never return
}
// Allocate a bigger segment and move the stack.
oldsize := gp.stack.hi - gp.stack.lo
newsize := oldsize * 2
// Make sure we grow at least as much as needed to fit the new frame.
// (This is just an optimization - the caller of morestack will
// recheck the bounds on return.)
if f := findfunc(gp.sched.pc); f.valid() {
max := uintptr(funcMaxSPDelta(f))
needed := max + stackGuard
used := gp.stack.hi - gp.sched.sp
for newsize-used < needed {
newsize *= 2
}
}
if stackguard0 == stackForceMove {
// Forced stack movement used for debugging.
// Don't double the stack (or we may quickly run out
// if this is done repeatedly).
newsize = oldsize
}
if newsize > maxstacksize || newsize > maxstackceiling {
if maxstacksize < maxstackceiling {
print("runtime: goroutine stack exceeds ", maxstacksize, "-byte limit\n")
} else {
print("runtime: goroutine stack exceeds ", maxstackceiling, "-byte limit\n")
}
print("runtime: sp=", hex(sp), " stack=[", hex(gp.stack.lo), ", ", hex(gp.stack.hi), "]\n")
throw("stack overflow")
}
// The goroutine must be executing in order to call newstack,
// so it must be Grunning (or Gscanrunning).
casgstatus(gp, _Grunning, _Gcopystack)
// The concurrent GC will not scan the stack while we are doing the copy since
// the gp is in a Gcopystack status.
runtime: make copystack/sudog synchronization more explicit When we copy a stack of a goroutine blocked in a channel operation, we have to be very careful because other goroutines may be writing to that goroutine's stack. To handle this, stack copying acquires the locks for the channels a goroutine is waiting on. One complication is that stack growth may happen while a goroutine holds these locks, in which case stack copying must *not* acquire these locks because that would self-deadlock. Currently, stack growth never acquires these locks because stack growth only happens when a goroutine is running, which means it's either not blocking on a channel or it's holding the channel locks already. Stack shrinking always acquires these locks because shrinking happens asynchronously, so the goroutine is never running, so there are either no locks or they've been released by the goroutine. However, we're about to change when stack shrinking can happen, which is going to break the current rules. Rather than find a new way to derive whether to acquire these locks or not, this CL simply adds a flag to the g struct that indicates that stack copying should acquire channel locks. This flag is set while the goroutine is blocked on a channel op. For #10958, #24543. Change-Id: Ia2ac8831b1bfda98d39bb30285e144c4f7eaf9ab Reviewed-on: https://go-review.googlesource.com/c/go/+/172982 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Michael Knyszek <mknyszek@google.com>
2019-04-03 14:00:12 -04:00
copystack(gp, newsize)
if stackDebug >= 1 {
print("stack grow done\n")
}
casgstatus(gp, _Gcopystack, _Grunning)
gogo(&gp.sched)
}
//go:nosplit
func nilfunc() {
*(*uint8)(nil) = 0
}
// adjust Gobuf as if it executed a call to fn
// and then stopped before the first instruction in fn.
func gostartcallfn(gobuf *gobuf, fv *funcval) {
var fn unsafe.Pointer
if fv != nil {
fn = unsafe.Pointer(fv.fn)
} else {
fn = unsafe.Pointer(abi.FuncPCABIInternal(nilfunc))
}
gostartcall(gobuf, fn, unsafe.Pointer(fv))
}
// isShrinkStackSafe returns whether it's safe to attempt to shrink
// gp's stack. Shrinking the stack is only safe when we have precise
runtime: take a stack trace during tracing only when we own the stack Currently, the execution tracer may attempt to take a stack trace of a goroutine whose stack it does not own. For example, if the goroutine is in _Grunnable or _Gwaiting. This is easily fixed in all cases by simply moving the emission of GoStop and GoBlock events to before the casgstatus happens. The goroutine status is what is used to signal stack ownership, and the GC may shrink a goroutine's stack if it can acquire the scan bit. Although this is easily fixed, the interaction here is very subtle, because stack ownership is only implicit in the goroutine's scan status. To make this invariant more maintainable and less error-prone in the future, this change adds a GODEBUG setting that checks, at the point of taking a stack trace, whether the caller owns the goroutine. This check is not quite perfect because there's no way for the stack tracing code to know that the _Gscan bit was acquired by the caller, so for simplicity it assumes that it was the caller that acquired the scan bit. In all other cases however, we can check for ownership precisely. At the very least, this check is sufficient to catch the issue this change is fixing. To make sure this debug check doesn't bitrot, it's always enabled during trace testing. This new mode has actually caught a few other issues already, so this change fixes them. One issue that this debug mode caught was that it's not safe to take a stack trace of a _Gwaiting goroutine that's being unparked. Another much bigger issue this debug mode caught was the fact that the execution tracer could try to take a stack trace of a G that was in _Gwaiting solely to avoid a deadlock in the GC. The execution tracer already has a partial list of these cases since they're modeled as the goroutine just executing as normal in the tracer, but this change takes the list and makes it more formal. In this specific case, we now prevent the GC from shrinking the stacks of goroutines in this state if tracing is enabled. The stack traces from these scenarios are too useful to discard, but there is indeed a race here between the tracer and any attempt to shrink the stack by the GC. Change-Id: I019850dabc8cede202fd6dcc0a4b1f16764209fb Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest,gotip-linux-amd64-longtest-race Reviewed-on: https://go-review.googlesource.com/c/go/+/573155 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Cherry Mui <cherryyz@google.com> Auto-Submit: Michael Knyszek <mknyszek@google.com>
2024-03-21 18:49:05 +00:00
// pointer maps for all frames on the stack. The caller must hold the
// _Gscan bit for gp or must be running gp itself.
func isShrinkStackSafe(gp *g) bool {
// We can't copy the stack if we're in a syscall.
// The syscall might have pointers into the stack and
// often we don't have precise pointer maps for the innermost
// frames.
runtime: take a stack trace during tracing only when we own the stack Currently, the execution tracer may attempt to take a stack trace of a goroutine whose stack it does not own. For example, if the goroutine is in _Grunnable or _Gwaiting. This is easily fixed in all cases by simply moving the emission of GoStop and GoBlock events to before the casgstatus happens. The goroutine status is what is used to signal stack ownership, and the GC may shrink a goroutine's stack if it can acquire the scan bit. Although this is easily fixed, the interaction here is very subtle, because stack ownership is only implicit in the goroutine's scan status. To make this invariant more maintainable and less error-prone in the future, this change adds a GODEBUG setting that checks, at the point of taking a stack trace, whether the caller owns the goroutine. This check is not quite perfect because there's no way for the stack tracing code to know that the _Gscan bit was acquired by the caller, so for simplicity it assumes that it was the caller that acquired the scan bit. In all other cases however, we can check for ownership precisely. At the very least, this check is sufficient to catch the issue this change is fixing. To make sure this debug check doesn't bitrot, it's always enabled during trace testing. This new mode has actually caught a few other issues already, so this change fixes them. One issue that this debug mode caught was that it's not safe to take a stack trace of a _Gwaiting goroutine that's being unparked. Another much bigger issue this debug mode caught was the fact that the execution tracer could try to take a stack trace of a G that was in _Gwaiting solely to avoid a deadlock in the GC. The execution tracer already has a partial list of these cases since they're modeled as the goroutine just executing as normal in the tracer, but this change takes the list and makes it more formal. In this specific case, we now prevent the GC from shrinking the stacks of goroutines in this state if tracing is enabled. The stack traces from these scenarios are too useful to discard, but there is indeed a race here between the tracer and any attempt to shrink the stack by the GC. Change-Id: I019850dabc8cede202fd6dcc0a4b1f16764209fb Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest,gotip-linux-amd64-longtest-race Reviewed-on: https://go-review.googlesource.com/c/go/+/573155 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Cherry Mui <cherryyz@google.com> Auto-Submit: Michael Knyszek <mknyszek@google.com>
2024-03-21 18:49:05 +00:00
if gp.syscallsp != 0 {
return false
}
// We also can't copy the stack if we're at an asynchronous
// safe-point because we don't have precise pointer maps for
// all frames.
runtime: take a stack trace during tracing only when we own the stack Currently, the execution tracer may attempt to take a stack trace of a goroutine whose stack it does not own. For example, if the goroutine is in _Grunnable or _Gwaiting. This is easily fixed in all cases by simply moving the emission of GoStop and GoBlock events to before the casgstatus happens. The goroutine status is what is used to signal stack ownership, and the GC may shrink a goroutine's stack if it can acquire the scan bit. Although this is easily fixed, the interaction here is very subtle, because stack ownership is only implicit in the goroutine's scan status. To make this invariant more maintainable and less error-prone in the future, this change adds a GODEBUG setting that checks, at the point of taking a stack trace, whether the caller owns the goroutine. This check is not quite perfect because there's no way for the stack tracing code to know that the _Gscan bit was acquired by the caller, so for simplicity it assumes that it was the caller that acquired the scan bit. In all other cases however, we can check for ownership precisely. At the very least, this check is sufficient to catch the issue this change is fixing. To make sure this debug check doesn't bitrot, it's always enabled during trace testing. This new mode has actually caught a few other issues already, so this change fixes them. One issue that this debug mode caught was that it's not safe to take a stack trace of a _Gwaiting goroutine that's being unparked. Another much bigger issue this debug mode caught was the fact that the execution tracer could try to take a stack trace of a G that was in _Gwaiting solely to avoid a deadlock in the GC. The execution tracer already has a partial list of these cases since they're modeled as the goroutine just executing as normal in the tracer, but this change takes the list and makes it more formal. In this specific case, we now prevent the GC from shrinking the stacks of goroutines in this state if tracing is enabled. The stack traces from these scenarios are too useful to discard, but there is indeed a race here between the tracer and any attempt to shrink the stack by the GC. Change-Id: I019850dabc8cede202fd6dcc0a4b1f16764209fb Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest,gotip-linux-amd64-longtest-race Reviewed-on: https://go-review.googlesource.com/c/go/+/573155 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Cherry Mui <cherryyz@google.com> Auto-Submit: Michael Knyszek <mknyszek@google.com>
2024-03-21 18:49:05 +00:00
if gp.asyncSafePoint {
return false
}
runtime: disable stack shrinking in activeStackChans race window Currently activeStackChans is set before a goroutine blocks on a channel operation in an unlockf passed to gopark. The trouble is that the unlockf is called *after* the G's status is changed, and the G's status is what is used by a concurrent mark worker (calling suspendG) to determine that a G has successfully been suspended. In this window between the status change and unlockf, the mark worker could try to shrink the G's stack, and in particular observe that activeStackChans is false. This observation will cause the mark worker to *not* synchronize with concurrent channel operations when it should, and so updating pointers in the sudog for the blocked goroutine (which may point to the goroutine's stack) races with channel operations which may also manipulate the pointer (read it, dereference it, update it, etc.). Fix the problem by adding a new atomically-updated flag to the g struct called parkingOnChan, which is non-zero in the race window above. Then, in isShrinkStackSafe, check if parkingOnChan is zero. The race is resolved like so: * Blocking G sets parkingOnChan, then changes status in gopark. * Mark worker successfully suspends blocking G. * If the mark worker observes parkingOnChan is non-zero when checking isShrinkStackSafe, then it's not safe to shrink (we're in the race window). * If the mark worker observes parkingOnChan as zero, then because the mark worker observed the G status change, it can be sure that gopark's unlockf completed, and gp.activeStackChans will be correct. The risk of this change is low, since although it reduces the number of places that stack shrinking is allowed, the window here is incredibly small. Essentially, every place that it might crash now is replaced with no shrink. This change adds a test, but the race window is so small that it's hard to trigger without a well-placed sleep in park_m. Also, this change fixes stackGrowRecursive in proc_test.go to actually allocate a 128-byte stack frame. It turns out the compiler was destructuring the "pad" field and only allocating one uint64 on the stack. Fixes #40641. Change-Id: I7dfbe7d460f6972b8956116b137bc13bc24464e8 Reviewed-on: https://go-review.googlesource.com/c/go/+/247050 Run-TryBot: Michael Knyszek <mknyszek@google.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Michael Pratt <mpratt@google.com> Trust: Michael Knyszek <mknyszek@google.com>
2020-08-10 20:02:22 +00:00
// We also can't *shrink* the stack in the window between the
// goroutine calling gopark to park on a channel and
// gp.activeStackChans being set.
runtime: take a stack trace during tracing only when we own the stack Currently, the execution tracer may attempt to take a stack trace of a goroutine whose stack it does not own. For example, if the goroutine is in _Grunnable or _Gwaiting. This is easily fixed in all cases by simply moving the emission of GoStop and GoBlock events to before the casgstatus happens. The goroutine status is what is used to signal stack ownership, and the GC may shrink a goroutine's stack if it can acquire the scan bit. Although this is easily fixed, the interaction here is very subtle, because stack ownership is only implicit in the goroutine's scan status. To make this invariant more maintainable and less error-prone in the future, this change adds a GODEBUG setting that checks, at the point of taking a stack trace, whether the caller owns the goroutine. This check is not quite perfect because there's no way for the stack tracing code to know that the _Gscan bit was acquired by the caller, so for simplicity it assumes that it was the caller that acquired the scan bit. In all other cases however, we can check for ownership precisely. At the very least, this check is sufficient to catch the issue this change is fixing. To make sure this debug check doesn't bitrot, it's always enabled during trace testing. This new mode has actually caught a few other issues already, so this change fixes them. One issue that this debug mode caught was that it's not safe to take a stack trace of a _Gwaiting goroutine that's being unparked. Another much bigger issue this debug mode caught was the fact that the execution tracer could try to take a stack trace of a G that was in _Gwaiting solely to avoid a deadlock in the GC. The execution tracer already has a partial list of these cases since they're modeled as the goroutine just executing as normal in the tracer, but this change takes the list and makes it more formal. In this specific case, we now prevent the GC from shrinking the stacks of goroutines in this state if tracing is enabled. The stack traces from these scenarios are too useful to discard, but there is indeed a race here between the tracer and any attempt to shrink the stack by the GC. Change-Id: I019850dabc8cede202fd6dcc0a4b1f16764209fb Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest,gotip-linux-amd64-longtest-race Reviewed-on: https://go-review.googlesource.com/c/go/+/573155 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Cherry Mui <cherryyz@google.com> Auto-Submit: Michael Knyszek <mknyszek@google.com>
2024-03-21 18:49:05 +00:00
if gp.parkingOnChan.Load() {
return false
}
// We also can't copy the stack while a gp is in _Gwaiting solely
// to make itself available to suspendG.
//
runtime: take a stack trace during tracing only when we own the stack Currently, the execution tracer may attempt to take a stack trace of a goroutine whose stack it does not own. For example, if the goroutine is in _Grunnable or _Gwaiting. This is easily fixed in all cases by simply moving the emission of GoStop and GoBlock events to before the casgstatus happens. The goroutine status is what is used to signal stack ownership, and the GC may shrink a goroutine's stack if it can acquire the scan bit. Although this is easily fixed, the interaction here is very subtle, because stack ownership is only implicit in the goroutine's scan status. To make this invariant more maintainable and less error-prone in the future, this change adds a GODEBUG setting that checks, at the point of taking a stack trace, whether the caller owns the goroutine. This check is not quite perfect because there's no way for the stack tracing code to know that the _Gscan bit was acquired by the caller, so for simplicity it assumes that it was the caller that acquired the scan bit. In all other cases however, we can check for ownership precisely. At the very least, this check is sufficient to catch the issue this change is fixing. To make sure this debug check doesn't bitrot, it's always enabled during trace testing. This new mode has actually caught a few other issues already, so this change fixes them. One issue that this debug mode caught was that it's not safe to take a stack trace of a _Gwaiting goroutine that's being unparked. Another much bigger issue this debug mode caught was the fact that the execution tracer could try to take a stack trace of a G that was in _Gwaiting solely to avoid a deadlock in the GC. The execution tracer already has a partial list of these cases since they're modeled as the goroutine just executing as normal in the tracer, but this change takes the list and makes it more formal. In this specific case, we now prevent the GC from shrinking the stacks of goroutines in this state if tracing is enabled. The stack traces from these scenarios are too useful to discard, but there is indeed a race here between the tracer and any attempt to shrink the stack by the GC. Change-Id: I019850dabc8cede202fd6dcc0a4b1f16764209fb Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest,gotip-linux-amd64-longtest-race Reviewed-on: https://go-review.googlesource.com/c/go/+/573155 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Cherry Mui <cherryyz@google.com> Auto-Submit: Michael Knyszek <mknyszek@google.com>
2024-03-21 18:49:05 +00:00
// In these cases, the G is actually executing on the system
// stack, and the execution tracer, mutex profiler, etc. may want
// to take a stack trace of the G's stack.
//
// Note: it's safe to access gp.waitreason here.
// We're only calling isShrinkStackSafe if we took ownership of the
runtime: take a stack trace during tracing only when we own the stack Currently, the execution tracer may attempt to take a stack trace of a goroutine whose stack it does not own. For example, if the goroutine is in _Grunnable or _Gwaiting. This is easily fixed in all cases by simply moving the emission of GoStop and GoBlock events to before the casgstatus happens. The goroutine status is what is used to signal stack ownership, and the GC may shrink a goroutine's stack if it can acquire the scan bit. Although this is easily fixed, the interaction here is very subtle, because stack ownership is only implicit in the goroutine's scan status. To make this invariant more maintainable and less error-prone in the future, this change adds a GODEBUG setting that checks, at the point of taking a stack trace, whether the caller owns the goroutine. This check is not quite perfect because there's no way for the stack tracing code to know that the _Gscan bit was acquired by the caller, so for simplicity it assumes that it was the caller that acquired the scan bit. In all other cases however, we can check for ownership precisely. At the very least, this check is sufficient to catch the issue this change is fixing. To make sure this debug check doesn't bitrot, it's always enabled during trace testing. This new mode has actually caught a few other issues already, so this change fixes them. One issue that this debug mode caught was that it's not safe to take a stack trace of a _Gwaiting goroutine that's being unparked. Another much bigger issue this debug mode caught was the fact that the execution tracer could try to take a stack trace of a G that was in _Gwaiting solely to avoid a deadlock in the GC. The execution tracer already has a partial list of these cases since they're modeled as the goroutine just executing as normal in the tracer, but this change takes the list and makes it more formal. In this specific case, we now prevent the GC from shrinking the stacks of goroutines in this state if tracing is enabled. The stack traces from these scenarios are too useful to discard, but there is indeed a race here between the tracer and any attempt to shrink the stack by the GC. Change-Id: I019850dabc8cede202fd6dcc0a4b1f16764209fb Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest,gotip-linux-amd64-longtest-race Reviewed-on: https://go-review.googlesource.com/c/go/+/573155 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Cherry Mui <cherryyz@google.com> Auto-Submit: Michael Knyszek <mknyszek@google.com>
2024-03-21 18:49:05 +00:00
// G with the _Gscan bit. This prevents the goroutine from transitioning,
// which prevents gp.waitreason from changing.
if readgstatus(gp)&^_Gscan == _Gwaiting && gp.waitreason.isWaitingForSuspendG() {
runtime: take a stack trace during tracing only when we own the stack Currently, the execution tracer may attempt to take a stack trace of a goroutine whose stack it does not own. For example, if the goroutine is in _Grunnable or _Gwaiting. This is easily fixed in all cases by simply moving the emission of GoStop and GoBlock events to before the casgstatus happens. The goroutine status is what is used to signal stack ownership, and the GC may shrink a goroutine's stack if it can acquire the scan bit. Although this is easily fixed, the interaction here is very subtle, because stack ownership is only implicit in the goroutine's scan status. To make this invariant more maintainable and less error-prone in the future, this change adds a GODEBUG setting that checks, at the point of taking a stack trace, whether the caller owns the goroutine. This check is not quite perfect because there's no way for the stack tracing code to know that the _Gscan bit was acquired by the caller, so for simplicity it assumes that it was the caller that acquired the scan bit. In all other cases however, we can check for ownership precisely. At the very least, this check is sufficient to catch the issue this change is fixing. To make sure this debug check doesn't bitrot, it's always enabled during trace testing. This new mode has actually caught a few other issues already, so this change fixes them. One issue that this debug mode caught was that it's not safe to take a stack trace of a _Gwaiting goroutine that's being unparked. Another much bigger issue this debug mode caught was the fact that the execution tracer could try to take a stack trace of a G that was in _Gwaiting solely to avoid a deadlock in the GC. The execution tracer already has a partial list of these cases since they're modeled as the goroutine just executing as normal in the tracer, but this change takes the list and makes it more formal. In this specific case, we now prevent the GC from shrinking the stacks of goroutines in this state if tracing is enabled. The stack traces from these scenarios are too useful to discard, but there is indeed a race here between the tracer and any attempt to shrink the stack by the GC. Change-Id: I019850dabc8cede202fd6dcc0a4b1f16764209fb Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest,gotip-linux-amd64-longtest-race Reviewed-on: https://go-review.googlesource.com/c/go/+/573155 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Cherry Mui <cherryyz@google.com> Auto-Submit: Michael Knyszek <mknyszek@google.com>
2024-03-21 18:49:05 +00:00
return false
}
return true
}
// Maybe shrink the stack being used by gp.
//
// gp must be stopped and we must own its stack. It may be in
// _Grunning, but only if this is our own user G.
func shrinkstack(gp *g) {
if gp.stack.lo == 0 {
throw("missing stack in shrinkstack")
}
if s := readgstatus(gp); s&_Gscan == 0 {
// We don't own the stack via _Gscan. We could still
// own it if this is our own user G and we're on the
// system stack.
if !(gp == getg().m.curg && getg() != getg().m.curg && s == _Grunning) {
// We don't own the stack.
throw("bad status in shrinkstack")
}
}
if !isShrinkStackSafe(gp) {
throw("shrinkstack at bad time")
runtime: shrink stacks during concurrent mark Currently we shrink stacks during STW mark termination because it used to be unsafe to shrink them concurrently. For some programs, this significantly increases pause time: stack shrinking costs ~5ms/MB copied plus 2µs/shrink. Now that we've made it safe to shrink a stack without the world being stopped, shrink them during the concurrent mark phase. This reduces the STW time in the program from issue #12967 by an order of magnitude and brings it from over the 10ms goal to well under: name old 95%ile-markTerm-time new 95%ile-markTerm-time delta Stackshrink-4 23.8ms ±60% 1.80ms ±39% -92.44% (p=0.008 n=5+5) Fixes #12967. This slows down the go1 and garbage benchmarks overall by < 0.5%. name old time/op new time/op delta XBenchGarbage-12 2.48ms ± 1% 2.49ms ± 1% +0.45% (p=0.005 n=25+21) name old time/op new time/op delta BinaryTree17-12 2.93s ± 2% 2.97s ± 2% +1.34% (p=0.002 n=19+20) Fannkuch11-12 2.51s ± 1% 2.59s ± 0% +3.09% (p=0.000 n=18+18) FmtFprintfEmpty-12 51.1ns ± 2% 51.5ns ± 1% ~ (p=0.280 n=20+17) FmtFprintfString-12 175ns ± 1% 169ns ± 1% -3.01% (p=0.000 n=20+20) FmtFprintfInt-12 160ns ± 1% 160ns ± 0% +0.53% (p=0.000 n=20+20) FmtFprintfIntInt-12 265ns ± 0% 266ns ± 1% +0.59% (p=0.000 n=20+20) FmtFprintfPrefixedInt-12 237ns ± 1% 238ns ± 1% +0.44% (p=0.000 n=20+20) FmtFprintfFloat-12 326ns ± 1% 341ns ± 1% +4.55% (p=0.000 n=20+19) FmtManyArgs-12 1.01µs ± 0% 1.02µs ± 0% +0.43% (p=0.000 n=20+19) GobDecode-12 8.41ms ± 1% 8.30ms ± 2% -1.22% (p=0.000 n=20+19) GobEncode-12 6.66ms ± 1% 6.68ms ± 0% +0.30% (p=0.000 n=18+19) Gzip-12 322ms ± 1% 322ms ± 1% ~ (p=1.000 n=20+20) Gunzip-12 42.8ms ± 0% 42.9ms ± 0% ~ (p=0.174 n=20+20) HTTPClientServer-12 69.7µs ± 1% 70.6µs ± 1% +1.20% (p=0.000 n=20+20) JSONEncode-12 16.8ms ± 0% 16.8ms ± 1% ~ (p=0.154 n=19+19) JSONDecode-12 65.1ms ± 0% 65.3ms ± 1% +0.34% (p=0.003 n=20+20) Mandelbrot200-12 3.93ms ± 0% 3.92ms ± 0% ~ (p=0.396 n=19+20) GoParse-12 3.66ms ± 1% 3.65ms ± 1% ~ (p=0.117 n=16+18) RegexpMatchEasy0_32-12 85.0ns ± 2% 85.5ns ± 2% ~ (p=0.143 n=20+20) RegexpMatchEasy0_1K-12 267ns ± 1% 267ns ± 1% ~ (p=0.867 n=20+17) RegexpMatchEasy1_32-12 83.3ns ± 2% 83.8ns ± 1% ~ (p=0.068 n=20+20) RegexpMatchEasy1_1K-12 432ns ± 1% 432ns ± 1% ~ (p=0.804 n=20+19) RegexpMatchMedium_32-12 133ns ± 0% 133ns ± 0% ~ (p=1.000 n=20+20) RegexpMatchMedium_1K-12 40.3µs ± 1% 40.4µs ± 1% ~ (p=0.319 n=20+19) RegexpMatchHard_32-12 2.10µs ± 1% 2.10µs ± 1% ~ (p=0.723 n=20+18) RegexpMatchHard_1K-12 63.0µs ± 0% 63.0µs ± 0% ~ (p=0.158 n=19+17) Revcomp-12 461ms ± 1% 476ms ± 8% +3.29% (p=0.002 n=20+20) Template-12 80.1ms ± 1% 79.3ms ± 1% -1.00% (p=0.000 n=20+20) TimeParse-12 360ns ± 0% 360ns ± 0% ~ (p=0.802 n=18+19) TimeFormat-12 374ns ± 1% 372ns ± 0% -0.77% (p=0.000 n=20+19) [Geo mean] 61.8µs 62.0µs +0.40% Change-Id: Ib60cd46b7a4987e07670eb271d22f6cee5802842 Reviewed-on: https://go-review.googlesource.com/20044 Reviewed-by: Keith Randall <khr@golang.org>
2016-02-15 18:30:48 -05:00
}
runtime: make m.libcallsp check in shrinkstack panic Currently, shrinkstack will not shrink a stack on Windows if gp.m.libcallsp != 0. In general, we can't shrink stacks in syscalls because the syscall may hold pointers into the stack, and in principle this is supposed to be preventing that for libcall-based syscalls (which are direct syscalls from the runtime). But this test is actually broken and has been for a long time. That turns out to be okay because it also appears it's not necessary. This test is racy. g.m points to whatever M the G was last running on, even if the G is in a blocked state, and that M could be doing anything, including making libcalls. Hence, observing that libcallsp == 0 at one moment in shrinkstack is no guarantee that it won't become non-zero while we're shrinking the stack, and vice-versa. It's also weird that this check is only performed on Windows, given that we now use libcalls on macOS, Solaris, and AIX. This check was added when stack shrinking was first implemented in CL 69580044. The history of that CL (though not the final version) suggests this was necessary for libcalls that happened on Go user stacks, which we never do now because of the limited stack space. It could also be defending against user stack pointers passed to libcall system calls from blocked Gs. But the runtime isn't allowed to keep pointers into the user stack for blocked Gs on any OS, so it's not clear this would be of any value. Hence, this checks seems to be simply unnecessary. Rather than simply remove it, this CL makes it defensive. We can't do anything about blocked Gs, since it doesn't even make sense to look at their M, but if a G tries to shrink its own stack while in a libcall, that indicates a bug in the libcall code. This CL makes shrinkstack panic in this case. For #10958, #24543, since those are going to rearrange how we decide that it's safe to shrink a stack. Change-Id: Ia865e1f6340cff26637f8d513970f9ebb4735c6d Reviewed-on: https://go-review.googlesource.com/c/go/+/173724 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
2019-04-24 11:53:34 -04:00
// Check for self-shrinks while in a libcall. These may have
// pointers into the stack disguised as uintptrs, but these
// code paths should all be nosplit.
if gp == getg().m.curg && gp.m.libcallsp != 0 {
throw("shrinking stack in libcall")
}
if debug.gcshrinkstackoff > 0 {
return
}
oldsize := gp.stack.hi - gp.stack.lo
newsize := oldsize / 2
runtime: account for stack guard when shrinking the stack Currently, when shrinkstack computes whether the halved stack allocation will have enough room for the stack, it accounts for the stack space that's actively in use but fails to leave extra room for the stack guard space. As a result, *if* the minimum stack size is small enough or the guard large enough, it may shrink the stack and leave less than enough room to run nosplit functions. If the next function called after the stack shrink is a nosplit function, it may overflow the stack without noticing and overwrite non-stack memory. We don't think this is happening under normal conditions right now. The minimum stack allocation is 2K and the guard is 640 bytes. The "worst case" stack shrink is from 4K (4048 bytes after stack barrier array reservation) to 2K (2016 bytes after stack barrier array reservation), which means the largest "used" size that will qualify for shrinking is 4048/4 - 8 = 1004 bytes. After copying, that leaves 2016 - 1004 = 1012 bytes of available stack, which is significantly more than the guard space. If we were to reduce the minimum stack size to 1K or raise the guard space above 1012 bytes, the logic in shrinkstack would no longer leave enough space. It's also possible to trigger this problem by setting firstStackBarrierOffset to 0, which puts stack barriers in a debug mode that steals away *half* of the stack for the stack barrier array reservation. Then, the largest "used" size that qualifies for shrinking is (4096/2)/4 - 8 = 504 bytes. After copying, that leaves (2096/2) - 504 = 8 bytes of available stack; much less than the required guard space. This causes failures like those in issue #11027 because func gc() shrinks its own stack and then immediately calls casgstatus (a nosplit function), which overflows the stack and overwrites a free list pointer in the neighboring span. However, since this seems to require the special debug mode, we don't think it's responsible for issue #11027. To forestall all of these subtle issues, this commit modifies shrinkstack to correctly account for the guard space when considering whether to halve the stack allocation. Change-Id: I7312584addc63b5bfe55cc384a1012f6181f1b9d Reviewed-on: https://go-review.googlesource.com/10714 Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-04 17:28:02 -04:00
// Don't shrink the allocation below the minimum-sized stack
// allocation.
if newsize < fixedStack {
runtime: account for stack guard when shrinking the stack Currently, when shrinkstack computes whether the halved stack allocation will have enough room for the stack, it accounts for the stack space that's actively in use but fails to leave extra room for the stack guard space. As a result, *if* the minimum stack size is small enough or the guard large enough, it may shrink the stack and leave less than enough room to run nosplit functions. If the next function called after the stack shrink is a nosplit function, it may overflow the stack without noticing and overwrite non-stack memory. We don't think this is happening under normal conditions right now. The minimum stack allocation is 2K and the guard is 640 bytes. The "worst case" stack shrink is from 4K (4048 bytes after stack barrier array reservation) to 2K (2016 bytes after stack barrier array reservation), which means the largest "used" size that will qualify for shrinking is 4048/4 - 8 = 1004 bytes. After copying, that leaves 2016 - 1004 = 1012 bytes of available stack, which is significantly more than the guard space. If we were to reduce the minimum stack size to 1K or raise the guard space above 1012 bytes, the logic in shrinkstack would no longer leave enough space. It's also possible to trigger this problem by setting firstStackBarrierOffset to 0, which puts stack barriers in a debug mode that steals away *half* of the stack for the stack barrier array reservation. Then, the largest "used" size that qualifies for shrinking is (4096/2)/4 - 8 = 504 bytes. After copying, that leaves (2096/2) - 504 = 8 bytes of available stack; much less than the required guard space. This causes failures like those in issue #11027 because func gc() shrinks its own stack and then immediately calls casgstatus (a nosplit function), which overflows the stack and overwrites a free list pointer in the neighboring span. However, since this seems to require the special debug mode, we don't think it's responsible for issue #11027. To forestall all of these subtle issues, this commit modifies shrinkstack to correctly account for the guard space when considering whether to halve the stack allocation. Change-Id: I7312584addc63b5bfe55cc384a1012f6181f1b9d Reviewed-on: https://go-review.googlesource.com/10714 Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-04 17:28:02 -04:00
return
}
runtime: account for stack guard when shrinking the stack Currently, when shrinkstack computes whether the halved stack allocation will have enough room for the stack, it accounts for the stack space that's actively in use but fails to leave extra room for the stack guard space. As a result, *if* the minimum stack size is small enough or the guard large enough, it may shrink the stack and leave less than enough room to run nosplit functions. If the next function called after the stack shrink is a nosplit function, it may overflow the stack without noticing and overwrite non-stack memory. We don't think this is happening under normal conditions right now. The minimum stack allocation is 2K and the guard is 640 bytes. The "worst case" stack shrink is from 4K (4048 bytes after stack barrier array reservation) to 2K (2016 bytes after stack barrier array reservation), which means the largest "used" size that will qualify for shrinking is 4048/4 - 8 = 1004 bytes. After copying, that leaves 2016 - 1004 = 1012 bytes of available stack, which is significantly more than the guard space. If we were to reduce the minimum stack size to 1K or raise the guard space above 1012 bytes, the logic in shrinkstack would no longer leave enough space. It's also possible to trigger this problem by setting firstStackBarrierOffset to 0, which puts stack barriers in a debug mode that steals away *half* of the stack for the stack barrier array reservation. Then, the largest "used" size that qualifies for shrinking is (4096/2)/4 - 8 = 504 bytes. After copying, that leaves (2096/2) - 504 = 8 bytes of available stack; much less than the required guard space. This causes failures like those in issue #11027 because func gc() shrinks its own stack and then immediately calls casgstatus (a nosplit function), which overflows the stack and overwrites a free list pointer in the neighboring span. However, since this seems to require the special debug mode, we don't think it's responsible for issue #11027. To forestall all of these subtle issues, this commit modifies shrinkstack to correctly account for the guard space when considering whether to halve the stack allocation. Change-Id: I7312584addc63b5bfe55cc384a1012f6181f1b9d Reviewed-on: https://go-review.googlesource.com/10714 Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-04 17:28:02 -04:00
// Compute how much of the stack is currently in use and only
// shrink the stack if gp is using less than a quarter of its
// current stack. The currently used stack includes everything
// down to the SP plus the stack guard space that ensures
// there's room for nosplit functions.
avail := gp.stack.hi - gp.stack.lo
if used := gp.stack.hi - gp.sched.sp + stackNosplit; used >= avail/4 {
runtime: account for stack guard when shrinking the stack Currently, when shrinkstack computes whether the halved stack allocation will have enough room for the stack, it accounts for the stack space that's actively in use but fails to leave extra room for the stack guard space. As a result, *if* the minimum stack size is small enough or the guard large enough, it may shrink the stack and leave less than enough room to run nosplit functions. If the next function called after the stack shrink is a nosplit function, it may overflow the stack without noticing and overwrite non-stack memory. We don't think this is happening under normal conditions right now. The minimum stack allocation is 2K and the guard is 640 bytes. The "worst case" stack shrink is from 4K (4048 bytes after stack barrier array reservation) to 2K (2016 bytes after stack barrier array reservation), which means the largest "used" size that will qualify for shrinking is 4048/4 - 8 = 1004 bytes. After copying, that leaves 2016 - 1004 = 1012 bytes of available stack, which is significantly more than the guard space. If we were to reduce the minimum stack size to 1K or raise the guard space above 1012 bytes, the logic in shrinkstack would no longer leave enough space. It's also possible to trigger this problem by setting firstStackBarrierOffset to 0, which puts stack barriers in a debug mode that steals away *half* of the stack for the stack barrier array reservation. Then, the largest "used" size that qualifies for shrinking is (4096/2)/4 - 8 = 504 bytes. After copying, that leaves (2096/2) - 504 = 8 bytes of available stack; much less than the required guard space. This causes failures like those in issue #11027 because func gc() shrinks its own stack and then immediately calls casgstatus (a nosplit function), which overflows the stack and overwrites a free list pointer in the neighboring span. However, since this seems to require the special debug mode, we don't think it's responsible for issue #11027. To forestall all of these subtle issues, this commit modifies shrinkstack to correctly account for the guard space when considering whether to halve the stack allocation. Change-Id: I7312584addc63b5bfe55cc384a1012f6181f1b9d Reviewed-on: https://go-review.googlesource.com/10714 Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-04 17:28:02 -04:00
return
}
if stackDebug > 0 {
print("shrinking stack ", oldsize, "->", newsize, "\n")
}
runtime: make copystack/sudog synchronization more explicit When we copy a stack of a goroutine blocked in a channel operation, we have to be very careful because other goroutines may be writing to that goroutine's stack. To handle this, stack copying acquires the locks for the channels a goroutine is waiting on. One complication is that stack growth may happen while a goroutine holds these locks, in which case stack copying must *not* acquire these locks because that would self-deadlock. Currently, stack growth never acquires these locks because stack growth only happens when a goroutine is running, which means it's either not blocking on a channel or it's holding the channel locks already. Stack shrinking always acquires these locks because shrinking happens asynchronously, so the goroutine is never running, so there are either no locks or they've been released by the goroutine. However, we're about to change when stack shrinking can happen, which is going to break the current rules. Rather than find a new way to derive whether to acquire these locks or not, this CL simply adds a flag to the g struct that indicates that stack copying should acquire channel locks. This flag is set while the goroutine is blocked on a channel op. For #10958, #24543. Change-Id: Ia2ac8831b1bfda98d39bb30285e144c4f7eaf9ab Reviewed-on: https://go-review.googlesource.com/c/go/+/172982 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Michael Knyszek <mknyszek@google.com>
2019-04-03 14:00:12 -04:00
copystack(gp, newsize)
}
runtime: don't free stack spans during GC Memory for stacks is manually managed by the runtime and, currently (with one exception) we free stack spans immediately when the last stack on a span is freed. However, the garbage collector assumes that spans can never transition from non-free to free during scan or mark. This disagreement makes it possible for the garbage collector to mark uninitialized objects and is blocking us from re-enabling the bad pointer test in the garbage collector (issue #9880). For example, the following sequence will result in marking an uninitialized object: 1. scanobject loads a pointer slot out of the object it's scanning. This happens to be one of the special pointers from the heap into a stack. Call the pointer p and suppose it points into X's stack. 2. X, running on another thread, grows its stack and frees its old stack. 3. The old stack happens to be large or was the last stack in its span, so X frees this span, setting it to state _MSpanFree. 4. The span gets reused as a heap span. 5. scanobject calls heapBitsForObject, which loads the span containing p, which is now in state _MSpanInUse, but doesn't necessarily have an object at p. The not-object at p gets marked, and at this point all sorts of things can go wrong. We already have a partial solution to this. When shrinking a stack, we put the old stack on a queue to be freed at the end of garbage collection. This was done to address exactly this problem, but wasn't a complete solution. This commit generalizes this solution to both shrinking and growing stacks. For stacks that fit in the stack pool, we simply don't free the span, even if its reference count reaches zero. It's fine to reuse the span for other stacks, and this enables that. At the end of GC, we sweep for cached stack spans with a zero reference count and free them. For larger stacks, we simply queue the stack span to be freed at the end of GC. Ideally, we would reuse these large stack spans the way we can small stack spans, but that's a more invasive change that will have to wait until after the freeze. Fixes #11267. Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71 Reviewed-on: https://go-review.googlesource.com/11502 Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
// freeStackSpans frees unused stack spans at the end of GC.
func freeStackSpans() {
// Scan stack pools for empty stack spans.
for order := range stackpool {
lock(&stackpool[order].item.mu)
list := &stackpool[order].item.span
for s := list.first; s != nil; {
runtime: don't free stack spans during GC Memory for stacks is manually managed by the runtime and, currently (with one exception) we free stack spans immediately when the last stack on a span is freed. However, the garbage collector assumes that spans can never transition from non-free to free during scan or mark. This disagreement makes it possible for the garbage collector to mark uninitialized objects and is blocking us from re-enabling the bad pointer test in the garbage collector (issue #9880). For example, the following sequence will result in marking an uninitialized object: 1. scanobject loads a pointer slot out of the object it's scanning. This happens to be one of the special pointers from the heap into a stack. Call the pointer p and suppose it points into X's stack. 2. X, running on another thread, grows its stack and frees its old stack. 3. The old stack happens to be large or was the last stack in its span, so X frees this span, setting it to state _MSpanFree. 4. The span gets reused as a heap span. 5. scanobject calls heapBitsForObject, which loads the span containing p, which is now in state _MSpanInUse, but doesn't necessarily have an object at p. The not-object at p gets marked, and at this point all sorts of things can go wrong. We already have a partial solution to this. When shrinking a stack, we put the old stack on a queue to be freed at the end of garbage collection. This was done to address exactly this problem, but wasn't a complete solution. This commit generalizes this solution to both shrinking and growing stacks. For stacks that fit in the stack pool, we simply don't free the span, even if its reference count reaches zero. It's fine to reuse the span for other stacks, and this enables that. At the end of GC, we sweep for cached stack spans with a zero reference count and free them. For larger stacks, we simply queue the stack span to be freed at the end of GC. Ideally, we would reuse these large stack spans the way we can small stack spans, but that's a more invasive change that will have to wait until after the freeze. Fixes #11267. Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71 Reviewed-on: https://go-review.googlesource.com/11502 Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
next := s.next
if s.allocCount == 0 {
list.remove(s)
s.manualFreeList = 0
osStackFree(s)
mheap_.freeManual(s, spanAllocStack)
runtime: don't free stack spans during GC Memory for stacks is manually managed by the runtime and, currently (with one exception) we free stack spans immediately when the last stack on a span is freed. However, the garbage collector assumes that spans can never transition from non-free to free during scan or mark. This disagreement makes it possible for the garbage collector to mark uninitialized objects and is blocking us from re-enabling the bad pointer test in the garbage collector (issue #9880). For example, the following sequence will result in marking an uninitialized object: 1. scanobject loads a pointer slot out of the object it's scanning. This happens to be one of the special pointers from the heap into a stack. Call the pointer p and suppose it points into X's stack. 2. X, running on another thread, grows its stack and frees its old stack. 3. The old stack happens to be large or was the last stack in its span, so X frees this span, setting it to state _MSpanFree. 4. The span gets reused as a heap span. 5. scanobject calls heapBitsForObject, which loads the span containing p, which is now in state _MSpanInUse, but doesn't necessarily have an object at p. The not-object at p gets marked, and at this point all sorts of things can go wrong. We already have a partial solution to this. When shrinking a stack, we put the old stack on a queue to be freed at the end of garbage collection. This was done to address exactly this problem, but wasn't a complete solution. This commit generalizes this solution to both shrinking and growing stacks. For stacks that fit in the stack pool, we simply don't free the span, even if its reference count reaches zero. It's fine to reuse the span for other stacks, and this enables that. At the end of GC, we sweep for cached stack spans with a zero reference count and free them. For larger stacks, we simply queue the stack span to be freed at the end of GC. Ideally, we would reuse these large stack spans the way we can small stack spans, but that's a more invasive change that will have to wait until after the freeze. Fixes #11267. Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71 Reviewed-on: https://go-review.googlesource.com/11502 Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
}
s = next
}
unlock(&stackpool[order].item.mu)
}
runtime: don't free stack spans during GC Memory for stacks is manually managed by the runtime and, currently (with one exception) we free stack spans immediately when the last stack on a span is freed. However, the garbage collector assumes that spans can never transition from non-free to free during scan or mark. This disagreement makes it possible for the garbage collector to mark uninitialized objects and is blocking us from re-enabling the bad pointer test in the garbage collector (issue #9880). For example, the following sequence will result in marking an uninitialized object: 1. scanobject loads a pointer slot out of the object it's scanning. This happens to be one of the special pointers from the heap into a stack. Call the pointer p and suppose it points into X's stack. 2. X, running on another thread, grows its stack and frees its old stack. 3. The old stack happens to be large or was the last stack in its span, so X frees this span, setting it to state _MSpanFree. 4. The span gets reused as a heap span. 5. scanobject calls heapBitsForObject, which loads the span containing p, which is now in state _MSpanInUse, but doesn't necessarily have an object at p. The not-object at p gets marked, and at this point all sorts of things can go wrong. We already have a partial solution to this. When shrinking a stack, we put the old stack on a queue to be freed at the end of garbage collection. This was done to address exactly this problem, but wasn't a complete solution. This commit generalizes this solution to both shrinking and growing stacks. For stacks that fit in the stack pool, we simply don't free the span, even if its reference count reaches zero. It's fine to reuse the span for other stacks, and this enables that. At the end of GC, we sweep for cached stack spans with a zero reference count and free them. For larger stacks, we simply queue the stack span to be freed at the end of GC. Ideally, we would reuse these large stack spans the way we can small stack spans, but that's a more invasive change that will have to wait until after the freeze. Fixes #11267. Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71 Reviewed-on: https://go-review.googlesource.com/11502 Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
// Free large stack spans.
lock(&stackLarge.lock)
for i := range stackLarge.free {
for s := stackLarge.free[i].first; s != nil; {
next := s.next
stackLarge.free[i].remove(s)
osStackFree(s)
mheap_.freeManual(s, spanAllocStack)
s = next
}
}
unlock(&stackLarge.lock)
}
// A stackObjectRecord is generated by the compiler for each stack object in a stack frame.
// This record must match the generator code in cmd/compile/internal/liveness/plive.go:emitStackObjects.
type stackObjectRecord struct {
// offset in frame
// if negative, offset from varp
// if non-negative, offset from argp
cmd/link,runtime: remove relocations from stkobjs Use an offset from go.func.* instead. This removes the last relocation from funcdata symbols, which lets us simplify that code. size before after Δ % addr2line 3683218 3680706 -2512 -0.068% api 4951074 4944850 -6224 -0.126% asm 4744258 4757586 +13328 +0.281% buildid 2419986 2418546 -1440 -0.060% cgo 4218306 4197346 -20960 -0.497% compile 22132066 22076882 -55184 -0.249% cover 4432834 4411362 -21472 -0.484% dist 3111202 3091346 -19856 -0.638% doc 3583602 3563234 -20368 -0.568% fix 3023922 3020658 -3264 -0.108% link 6188034 6164642 -23392 -0.378% nm 3665826 3646818 -19008 -0.519% objdump 4015234 4012450 -2784 -0.069% pack 2155010 2153554 -1456 -0.068% pprof 13044178 13011522 -32656 -0.250% test2json 2402146 2383906 -18240 -0.759% trace 9765410 9736514 -28896 -0.296% vet 6681250 6655058 -26192 -0.392% total 104217556 103926980 -290576 -0.279% relocs before after Δ % addr2line 25563 25066 -497 -1.944% api 18409 17176 -1233 -6.698% asm 18903 18271 -632 -3.343% buildid 9513 9233 -280 -2.943% cgo 17103 16222 -881 -5.151% compile 64825 60421 -4404 -6.794% cover 19464 18479 -985 -5.061% dist 10798 10135 -663 -6.140% doc 13503 12735 -768 -5.688% fix 11465 10820 -645 -5.626% link 23214 21849 -1365 -5.880% nm 25480 24987 -493 -1.935% objdump 26610 26057 -553 -2.078% pack 7951 7665 -286 -3.597% pprof 63964 60761 -3203 -5.008% test2json 8735 8389 -346 -3.961% trace 39639 37180 -2459 -6.203% vet 25970 24044 -1926 -7.416% total 431108 409489 -21619 -5.015% Change-Id: I43c26196a008da6d1cb3a782eea2f428778bd569 Reviewed-on: https://go-review.googlesource.com/c/go/+/353138 Trust: Josh Bleecher Snyder <josharian@gmail.com> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Cherry Mui <cherryyz@google.com>
2021-09-27 14:27:20 -07:00
off int32
size int32
ptrBytes int32
cmd/link,runtime: remove relocations from stkobjs Use an offset from go.func.* instead. This removes the last relocation from funcdata symbols, which lets us simplify that code. size before after Δ % addr2line 3683218 3680706 -2512 -0.068% api 4951074 4944850 -6224 -0.126% asm 4744258 4757586 +13328 +0.281% buildid 2419986 2418546 -1440 -0.060% cgo 4218306 4197346 -20960 -0.497% compile 22132066 22076882 -55184 -0.249% cover 4432834 4411362 -21472 -0.484% dist 3111202 3091346 -19856 -0.638% doc 3583602 3563234 -20368 -0.568% fix 3023922 3020658 -3264 -0.108% link 6188034 6164642 -23392 -0.378% nm 3665826 3646818 -19008 -0.519% objdump 4015234 4012450 -2784 -0.069% pack 2155010 2153554 -1456 -0.068% pprof 13044178 13011522 -32656 -0.250% test2json 2402146 2383906 -18240 -0.759% trace 9765410 9736514 -28896 -0.296% vet 6681250 6655058 -26192 -0.392% total 104217556 103926980 -290576 -0.279% relocs before after Δ % addr2line 25563 25066 -497 -1.944% api 18409 17176 -1233 -6.698% asm 18903 18271 -632 -3.343% buildid 9513 9233 -280 -2.943% cgo 17103 16222 -881 -5.151% compile 64825 60421 -4404 -6.794% cover 19464 18479 -985 -5.061% dist 10798 10135 -663 -6.140% doc 13503 12735 -768 -5.688% fix 11465 10820 -645 -5.626% link 23214 21849 -1365 -5.880% nm 25480 24987 -493 -1.935% objdump 26610 26057 -553 -2.078% pack 7951 7665 -286 -3.597% pprof 63964 60761 -3203 -5.008% test2json 8735 8389 -346 -3.961% trace 39639 37180 -2459 -6.203% vet 25970 24044 -1926 -7.416% total 431108 409489 -21619 -5.015% Change-Id: I43c26196a008da6d1cb3a782eea2f428778bd569 Reviewed-on: https://go-review.googlesource.com/c/go/+/353138 Trust: Josh Bleecher Snyder <josharian@gmail.com> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Cherry Mui <cherryyz@google.com>
2021-09-27 14:27:20 -07:00
gcdataoff uint32 // offset to gcdata from moduledata.rodata
cmd/compile, runtime: emit only GC data for stack objects Currently, for stack objects, the compiler emits metadata that includes the offset and type descriptor for each object. The type descriptor symbol has many fields, and it references many other symbols, e.g. field/element types, equality functions, names. Observe that what we actually need at runtime is only the GC metadata that are needed to scan the object, and the GC metadata are "leaf" symbols (which doesn't reference other symbols). Emit only the GC data instead. This avoids bringing live the type descriptor as well as things referenced by it (if it is not otherwise live). This reduces binary sizes: old new hello (println) 1187776 1133856 (-4.5%) hello (fmt) 1902448 1844416 (-3.1%) cmd/compile 22670432 22438576 (-1.0%) cmd/link 6346272 6225408 (-1.9%) No significant change in compiler speed. name old time/op new time/op delta Template 184ms ± 2% 186ms ± 5% ~ (p=0.905 n=9+10) Unicode 78.4ms ± 5% 76.3ms ± 3% -2.60% (p=0.009 n=10+10) GoTypes 1.09s ± 1% 1.08s ± 1% -0.73% (p=0.027 n=10+8) Compiler 85.6ms ± 3% 84.6ms ± 4% ~ (p=0.143 n=10+10) SSA 7.23s ± 1% 7.25s ± 1% ~ (p=0.780 n=10+9) Flate 116ms ± 5% 115ms ± 6% ~ (p=0.912 n=10+10) GoParser 201ms ± 4% 195ms ± 1% ~ (p=0.089 n=10+10) Reflect 455ms ± 1% 458ms ± 2% ~ (p=0.050 n=9+9) Tar 155ms ± 2% 155ms ± 3% ~ (p=0.436 n=10+10) XML 202ms ± 2% 200ms ± 2% ~ (p=0.053 n=10+9) Change-Id: I33a7f383d79afba1a482cac6da0cf5b7de9c0ec4 Reviewed-on: https://go-review.googlesource.com/c/go/+/313514 Trust: Cherry Zhang <cherryyz@google.com> Reviewed-by: Than McIntosh <thanm@google.com>
2021-04-24 12:41:17 -04:00
}
// gcdata returns the number of bytes that contain pointers, and
// a ptr/nonptr bitmask covering those bytes.
// Note that this bitmask might be larger than internal/abi.MaxPtrmaskBytes.
func (r *stackObjectRecord) gcdata() (uintptr, *byte) {
cmd/link,runtime: remove relocations from stkobjs Use an offset from go.func.* instead. This removes the last relocation from funcdata symbols, which lets us simplify that code. size before after Δ % addr2line 3683218 3680706 -2512 -0.068% api 4951074 4944850 -6224 -0.126% asm 4744258 4757586 +13328 +0.281% buildid 2419986 2418546 -1440 -0.060% cgo 4218306 4197346 -20960 -0.497% compile 22132066 22076882 -55184 -0.249% cover 4432834 4411362 -21472 -0.484% dist 3111202 3091346 -19856 -0.638% doc 3583602 3563234 -20368 -0.568% fix 3023922 3020658 -3264 -0.108% link 6188034 6164642 -23392 -0.378% nm 3665826 3646818 -19008 -0.519% objdump 4015234 4012450 -2784 -0.069% pack 2155010 2153554 -1456 -0.068% pprof 13044178 13011522 -32656 -0.250% test2json 2402146 2383906 -18240 -0.759% trace 9765410 9736514 -28896 -0.296% vet 6681250 6655058 -26192 -0.392% total 104217556 103926980 -290576 -0.279% relocs before after Δ % addr2line 25563 25066 -497 -1.944% api 18409 17176 -1233 -6.698% asm 18903 18271 -632 -3.343% buildid 9513 9233 -280 -2.943% cgo 17103 16222 -881 -5.151% compile 64825 60421 -4404 -6.794% cover 19464 18479 -985 -5.061% dist 10798 10135 -663 -6.140% doc 13503 12735 -768 -5.688% fix 11465 10820 -645 -5.626% link 23214 21849 -1365 -5.880% nm 25480 24987 -493 -1.935% objdump 26610 26057 -553 -2.078% pack 7951 7665 -286 -3.597% pprof 63964 60761 -3203 -5.008% test2json 8735 8389 -346 -3.961% trace 39639 37180 -2459 -6.203% vet 25970 24044 -1926 -7.416% total 431108 409489 -21619 -5.015% Change-Id: I43c26196a008da6d1cb3a782eea2f428778bd569 Reviewed-on: https://go-review.googlesource.com/c/go/+/353138 Trust: Josh Bleecher Snyder <josharian@gmail.com> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Cherry Mui <cherryyz@google.com>
2021-09-27 14:27:20 -07:00
ptr := uintptr(unsafe.Pointer(r))
var mod *moduledata
for datap := &firstmoduledata; datap != nil; datap = datap.next {
if datap.gofunc <= ptr && ptr < datap.end {
mod = datap
break
}
}
// If you get a panic here due to a nil mod,
// you may have made a copy of a stackObjectRecord.
// You must use the original pointer.
res := mod.rodata + uintptr(r.gcdataoff)
return uintptr(r.ptrBytes), (*byte)(unsafe.Pointer(res))
cmd/link,runtime: remove relocations from stkobjs Use an offset from go.func.* instead. This removes the last relocation from funcdata symbols, which lets us simplify that code. size before after Δ % addr2line 3683218 3680706 -2512 -0.068% api 4951074 4944850 -6224 -0.126% asm 4744258 4757586 +13328 +0.281% buildid 2419986 2418546 -1440 -0.060% cgo 4218306 4197346 -20960 -0.497% compile 22132066 22076882 -55184 -0.249% cover 4432834 4411362 -21472 -0.484% dist 3111202 3091346 -19856 -0.638% doc 3583602 3563234 -20368 -0.568% fix 3023922 3020658 -3264 -0.108% link 6188034 6164642 -23392 -0.378% nm 3665826 3646818 -19008 -0.519% objdump 4015234 4012450 -2784 -0.069% pack 2155010 2153554 -1456 -0.068% pprof 13044178 13011522 -32656 -0.250% test2json 2402146 2383906 -18240 -0.759% trace 9765410 9736514 -28896 -0.296% vet 6681250 6655058 -26192 -0.392% total 104217556 103926980 -290576 -0.279% relocs before after Δ % addr2line 25563 25066 -497 -1.944% api 18409 17176 -1233 -6.698% asm 18903 18271 -632 -3.343% buildid 9513 9233 -280 -2.943% cgo 17103 16222 -881 -5.151% compile 64825 60421 -4404 -6.794% cover 19464 18479 -985 -5.061% dist 10798 10135 -663 -6.140% doc 13503 12735 -768 -5.688% fix 11465 10820 -645 -5.626% link 23214 21849 -1365 -5.880% nm 25480 24987 -493 -1.935% objdump 26610 26057 -553 -2.078% pack 7951 7665 -286 -3.597% pprof 63964 60761 -3203 -5.008% test2json 8735 8389 -346 -3.961% trace 39639 37180 -2459 -6.203% vet 25970 24044 -1926 -7.416% total 431108 409489 -21619 -5.015% Change-Id: I43c26196a008da6d1cb3a782eea2f428778bd569 Reviewed-on: https://go-review.googlesource.com/c/go/+/353138 Trust: Josh Bleecher Snyder <josharian@gmail.com> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Cherry Mui <cherryyz@google.com>
2021-09-27 14:27:20 -07:00
}
// This is exported as ABI0 via linkname so obj can call it.
//
//go:nosplit
//go:linkname morestackc
func morestackc() {
throw("attempt to execute system stack code on user stack")
}
// startingStackSize is the amount of stack that new goroutines start with.
// It is a power of 2, and between fixedStack and maxstacksize, inclusive.
// startingStackSize is updated every GC by tracking the average size of
// stacks scanned during the GC.
var startingStackSize uint32 = fixedStack
func gcComputeStartingStackSize() {
if debug.adaptivestackstart == 0 {
return
}
// For details, see the design doc at
// https://docs.google.com/document/d/1YDlGIdVTPnmUiTAavlZxBI1d9pwGQgZT7IKFKlIXohQ/edit?usp=sharing
// The basic algorithm is to track the average size of stacks
// and start goroutines with stack equal to that average size.
// Starting at the average size uses at most 2x the space that
// an ideal algorithm would have used.
// This is just a heuristic to avoid excessive stack growth work
// early in a goroutine's lifetime. See issue 18138. Stacks that
// are allocated too small can still grow, and stacks allocated
// too large can still shrink.
var scannedStackSize uint64
var scannedStacks uint64
for _, p := range allp {
scannedStackSize += p.scannedStackSize
scannedStacks += p.scannedStacks
// Reset for next time
p.scannedStackSize = 0
p.scannedStacks = 0
}
if scannedStacks == 0 {
startingStackSize = fixedStack
return
}
avg := scannedStackSize/scannedStacks + stackGuard
// Note: we add stackGuard to ensure that a goroutine that
// uses the average space will not trigger a growth.
if avg > uint64(maxstacksize) {
avg = uint64(maxstacksize)
}
if avg < fixedStack {
avg = fixedStack
}
// Note: maxstacksize fits in 30 bits, so avg also does.
startingStackSize = uint32(round2(int32(avg)))
}