go/src/runtime/signal_unix.go

1459 lines
45 KiB
Go
Raw Normal View History

// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build unix
package runtime
import (
"internal/abi"
"internal/runtime/atomic"
"internal/runtime/sys"
"unsafe"
)
// sigTabT is the type of an entry in the global sigtable array.
// sigtable is inherently system dependent, and appears in OS-specific files,
// but sigTabT is the same for all Unixy systems.
// The sigtable array is indexed by a system signal number to get the flags
// and printable name of each signal.
type sigTabT struct {
flags int32
name string
}
//go:linkname os_sigpipe os.sigpipe
func os_sigpipe() {
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack Scalararg and ptrarg are not "signal safe". Go code filling them out can be interrupted by a signal, and then the signal handler runs, and if it also ends up in Go code that uses scalararg or ptrarg, now the old values have been smashed. For the pieces of code that do need to run in a signal handler, we introduced onM_signalok, which is really just onM except that the _signalok is meant to convey that the caller asserts that scalarg and ptrarg will be restored to their old values after the call (instead of the usual behavior, zeroing them). Scalararg and ptrarg are also untyped and therefore error-prone. Go code can always pass a closure instead of using scalararg and ptrarg; they were only really necessary for C code. And there's no more C code. For all these reasons, delete scalararg and ptrarg, converting the few remaining references to use closures. Once those are gone, there is no need for a distinction between onM and onM_signalok, so replace both with a single function equivalent to the current onM_signalok (that is, it can be called on any of the curg, g0, and gsignal stacks). The name onM and the phrase 'm stack' are misnomers, because on most system an M has two system stacks: the main thread stack and the signal handling stack. Correct the misnomer by naming the replacement function systemstack. Fix a few references to "M stack" in code. The main motivation for this change is to eliminate scalararg/ptrarg. Rick and I have already seen them cause problems because the calling sequence m.ptrarg[0] = p is a heap pointer assignment, so it gets a write barrier. The write barrier also uses onM, so it has all the same problems as if it were being invoked by a signal handler. We worked around this by saving and restoring the old values and by calling onM_signalok, but there's no point in keeping this nice home for bugs around any longer. This CL also changes funcline to return the file name as a result instead of filling in a passed-in *string. (The *string signature is left over from when the code was written in and called from C.) That's arguably an unrelated change, except that once I had done the ptrarg/scalararg/onM cleanup I started getting false positives about the *string argument escaping (not allowed in package runtime). The compiler is wrong, but the easiest fix is to write the code like Go code instead of like C code. I am a bit worried that the compiler is wrong because of some use of uninitialized memory in the escape analysis. If that's the reason, it will go away when we convert the compiler to Go. (And if not, we'll debug it the next time.) LGTM=khr R=r, khr CC=austin, golang-codereviews, iant, rlh https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
systemstack(sigpipe)
}
func signame(sig uint32) string {
if sig >= uint32(len(sigtable)) {
return ""
}
return sigtable[sig].name
}
const (
_SIG_DFL uintptr = 0
_SIG_IGN uintptr = 1
)
// sigPreempt is the signal used for non-cooperative preemption.
//
// There's no good way to choose this signal, but there are some
// heuristics:
//
// 1. It should be a signal that's passed-through by debuggers by
// default. On Linux, this is SIGALRM, SIGURG, SIGCHLD, SIGIO,
// SIGVTALRM, SIGPROF, and SIGWINCH, plus some glibc-internal signals.
//
// 2. It shouldn't be used internally by libc in mixed Go/C binaries
// because libc may assume it's the only thing that can handle these
// signals. For example SIGCANCEL or SIGSETXID.
//
// 3. It should be a signal that can happen spuriously without
// consequences. For example, SIGALRM is a bad choice because the
// signal handler can't tell if it was caused by the real process
// alarm or not (arguably this means the signal is broken, but I
// digress). SIGUSR1 and SIGUSR2 are also bad because those are often
// used in meaningful ways by applications.
//
// 4. We need to deal with platforms without real-time signals (like
// macOS), so those are out.
//
// We use SIGURG because it meets all of these criteria, is extremely
// unlikely to be used by an application for its "real" meaning (both
// because out-of-band data is basically unused and because SIGURG
// doesn't report which socket has the condition, making it pretty
// useless), and even if it is, the application has to be ready for
// spurious SIGURG. SIGIO wouldn't be a bad choice either, but is more
// likely to be used for real.
const sigPreempt = _SIGURG
// Stores the signal handlers registered before Go installed its own.
// These signal handlers will be invoked in cases where Go doesn't want to
// handle a particular signal (e.g., signal occurred on a non-Go thread).
// See sigfwdgo for more information on when the signals are forwarded.
//
// This is read by the signal handler; accesses should use
// atomic.Loaduintptr and atomic.Storeuintptr.
var fwdSig [_NSIG]uintptr
// handlingSig is indexed by signal number and is non-zero if we are
// currently handling the signal. Or, to put it another way, whether
// the signal handler is currently set to the Go signal handler or not.
// This is uint32 rather than bool so that we can use atomic instructions.
var handlingSig [_NSIG]uint32
// channels for synchronizing signal mask updates with the signal mask
// thread
var (
disableSigChan chan uint32
enableSigChan chan uint32
maskUpdatedChan chan struct{}
)
func init() {
// _NSIG is the number of signals on this operating system.
// sigtable should describe what to do for all the possible signals.
if len(sigtable) != _NSIG {
print("runtime: len(sigtable)=", len(sigtable), " _NSIG=", _NSIG, "\n")
throw("bad sigtable len")
}
}
var signalsOK bool
// Initialize signals.
// Called by libpreinit so runtime may not be initialized.
//
//go:nosplit
//go:nowritebarrierrec
func initsig(preinit bool) {
if !preinit {
// It's now OK for signal handlers to run.
signalsOK = true
}
// For c-archive/c-shared this is called by libpreinit with
// preinit == true.
if (isarchive || islibrary) && !preinit {
return
}
for i := uint32(0); i < _NSIG; i++ {
t := &sigtable[i]
if t.flags == 0 || t.flags&_SigDefault != 0 {
continue
}
// We don't need to use atomic operations here because
// there shouldn't be any other goroutines running yet.
fwdSig[i] = getsig(i)
if !sigInstallGoHandler(i) {
// Even if we are not installing a signal handler,
// set SA_ONSTACK if necessary.
if fwdSig[i] != _SIG_DFL && fwdSig[i] != _SIG_IGN {
setsigstack(i)
} else if fwdSig[i] == _SIG_IGN {
sigInitIgnored(i)
}
continue
}
handlingSig[i] = 1
setsig(i, abi.FuncPCABIInternal(sighandler))
}
}
//go:nosplit
//go:nowritebarrierrec
func sigInstallGoHandler(sig uint32) bool {
// For some signals, we respect an inherited SIG_IGN handler
// rather than insist on installing our own default handler.
// Even these signals can be fetched using the os/signal package.
switch sig {
case _SIGHUP, _SIGINT:
if atomic.Loaduintptr(&fwdSig[sig]) == _SIG_IGN {
return false
}
}
if (GOOS == "linux" || GOOS == "android") && !iscgo && sig == sigPerThreadSyscall {
runtime, syscall: reimplement AllThreadsSyscall using only signals. In issue 50113, we see that a thread blocked in a system call can result in a hang of AllThreadsSyscall. To resolve this, we must send a signal to these threads to knock them out of the system call long enough to run the per-thread syscall. Stepping back, if we need to send signals anyway, it should be possible to implement this entire mechanism on top of signals. This CL does so, vastly simplifying the mechanism, both as a direct result of newly-unnecessary code as well as some ancillary simplifications to make things simpler to follow. Major changes: * The rest of the mechanism is moved to os_linux.go, with fields in mOS instead of m itself. * 'Fixup' fields and functions are renamed to 'perThreadSyscall' so they are more precise about their purpose. * Rather than getting passed a closure, doAllThreadsSyscall takes the syscall number and arguments. This avoids a lot of hairy behavior: * The closure may potentially only be live in fields in the M, hidden from the GC. Not necessary with no closure. * The need to loan out the race context. A direct RawSyscall6 call does not require any race context. * The closure previously conditionally panicked in strange locations, like a signal handler. Now we simply throw. * All manual fixup synchronization with mPark, sysmon, templateThread, sigqueue, etc is gone. The core approach is much simpler: doAllThreadsSyscall sends a signal to every thread in allm, which executes the system call from the signal handler. We use (SIGRTMIN + 1), aka SIGSETXID, the same signal used by glibc for this purpose. As such, we are careful to only handle this signal on non-cgo binaries. Synchronization with thread creation is a key part of this CL. The comment near the top of doAllThreadsSyscall describes the required synchronization semantics and how they are achieved. Note that current use of allocmLock protects the state mutations of allm that are also protected by sched.lock. allocmLock is used instead of sched.lock simply to avoid holding sched.lock for so long. Fixes #50113 Change-Id: Ic7ea856dc66cf711731540a54996e08fc986ce84 Reviewed-on: https://go-review.googlesource.com/c/go/+/383434 Reviewed-by: Austin Clements <austin@google.com> Trust: Michael Pratt <mpratt@google.com> Run-TryBot: Michael Pratt <mpratt@google.com> TryBot-Result: Gopher Robot <gobot@golang.org>
2022-02-04 17:15:28 -05:00
// sigPerThreadSyscall is the same signal used by glibc for
// per-thread syscalls on Linux. We use it for the same purpose
// in non-cgo binaries.
return true
}
t := &sigtable[sig]
if t.flags&_SigSetStack != 0 {
return false
}
// When built using c-archive or c-shared, only install signal
// handlers for synchronous signals and SIGPIPE and sigPreempt.
if (isarchive || islibrary) && t.flags&_SigPanic == 0 && sig != _SIGPIPE && sig != sigPreempt {
return false
}
return true
}
// sigenable enables the Go signal handler to catch the signal sig.
// It is only called while holding the os/signal.handlers lock,
// via os/signal.enableSignal and signal_enable.
func sigenable(sig uint32) {
if sig >= uint32(len(sigtable)) {
return
}
// SIGPROF is handled specially for profiling.
if sig == _SIGPROF {
return
}
t := &sigtable[sig]
if t.flags&_SigNotify != 0 {
ensureSigM()
enableSigChan <- sig
<-maskUpdatedChan
if atomic.Cas(&handlingSig[sig], 0, 1) {
atomic.Storeuintptr(&fwdSig[sig], getsig(sig))
setsig(sig, abi.FuncPCABIInternal(sighandler))
}
}
}
// sigdisable disables the Go signal handler for the signal sig.
// It is only called while holding the os/signal.handlers lock,
// via os/signal.disableSignal and signal_disable.
func sigdisable(sig uint32) {
if sig >= uint32(len(sigtable)) {
return
}
// SIGPROF is handled specially for profiling.
if sig == _SIGPROF {
return
}
t := &sigtable[sig]
if t.flags&_SigNotify != 0 {
ensureSigM()
disableSigChan <- sig
<-maskUpdatedChan
// If initsig does not install a signal handler for a
// signal, then to go back to the state before Notify
// we should remove the one we installed.
if !sigInstallGoHandler(sig) {
atomic.Store(&handlingSig[sig], 0)
setsig(sig, atomic.Loaduintptr(&fwdSig[sig]))
}
}
}
// sigignore ignores the signal sig.
// It is only called while holding the os/signal.handlers lock,
// via os/signal.ignoreSignal and signal_ignore.
func sigignore(sig uint32) {
if sig >= uint32(len(sigtable)) {
return
}
// SIGPROF is handled specially for profiling.
if sig == _SIGPROF {
return
}
t := &sigtable[sig]
if t.flags&_SigNotify != 0 {
atomic.Store(&handlingSig[sig], 0)
setsig(sig, _SIG_IGN)
}
}
// clearSignalHandlers clears all signal handlers that are not ignored
// back to the default. This is called by the child after a fork, so that
// we can enable the signal mask for the exec without worrying about
// running a signal handler in the child.
//
//go:nosplit
//go:nowritebarrierrec
func clearSignalHandlers() {
for i := uint32(0); i < _NSIG; i++ {
if atomic.Load(&handlingSig[i]) != 0 {
setsig(i, _SIG_DFL)
}
}
}
// setProcessCPUProfilerTimer is called when the profiling timer changes.
// It is called with prof.signalLock held. hz is the new timer, and is 0 if
// profiling is being disabled. Enable or disable the signal as
// required for -buildmode=c-archive.
func setProcessCPUProfilerTimer(hz int32) {
if hz != 0 {
// Enable the Go signal handler if not enabled.
if atomic.Cas(&handlingSig[_SIGPROF], 0, 1) {
h := getsig(_SIGPROF)
// If no signal handler was installed before, then we record
// _SIG_IGN here. When we turn off profiling (below) we'll start
// ignoring SIGPROF signals. We do this, rather than change
// to SIG_DFL, because there may be a pending SIGPROF
// signal that has not yet been delivered to some other thread.
// If we change to SIG_DFL when turning off profiling, the
// program will crash when that SIGPROF is delivered. We assume
// that programs that use profiling don't want to crash on a
// stray SIGPROF. See issue 19320.
// We do the change here instead of when turning off profiling,
// because there we may race with a signal handler running
// concurrently, in particular, sigfwdgo may observe _SIG_DFL and
// die. See issue 43828.
if h == _SIG_DFL {
h = _SIG_IGN
}
atomic.Storeuintptr(&fwdSig[_SIGPROF], h)
setsig(_SIGPROF, abi.FuncPCABIInternal(sighandler))
}
var it itimerval
it.it_interval.tv_sec = 0
it.it_interval.set_usec(1000000 / hz)
it.it_value = it.it_interval
setitimer(_ITIMER_PROF, &it, nil)
} else {
setitimer(_ITIMER_PROF, &itimerval{}, nil)
// If the Go signal handler should be disabled by default,
// switch back to the signal handler that was installed
// when we enabled profiling. We don't try to handle the case
// of a program that changes the SIGPROF handler while Go
// profiling is enabled.
if !sigInstallGoHandler(_SIGPROF) {
if atomic.Cas(&handlingSig[_SIGPROF], 1, 0) {
h := atomic.Loaduintptr(&fwdSig[_SIGPROF])
setsig(_SIGPROF, h)
}
}
}
}
// setThreadCPUProfilerHz makes any thread-specific changes required to
// implement profiling at a rate of hz.
// No changes required on Unix systems when using setitimer.
func setThreadCPUProfilerHz(hz int32) {
getg().m.profilehz = hz
}
func sigpipe() {
if signal_ignored(_SIGPIPE) || sigsend(_SIGPIPE) {
return
}
dieFromSignal(_SIGPIPE)
}
// doSigPreempt handles a preemption signal on gp.
func doSigPreempt(gp *g, ctxt *sigctxt) {
// Check if this G wants to be preempted and is safe to
// preempt.
if wantAsyncPreempt(gp) {
if ok, newpc := isAsyncSafePoint(gp, ctxt.sigpc(), ctxt.sigsp(), ctxt.siglr()); ok {
// Adjust the PC and inject a call to asyncPreempt.
ctxt.pushCall(abi.FuncPCABI0(asyncPreempt), newpc)
}
}
// Acknowledge the preemption.
gp.m.preemptGen.Add(1)
gp.m.signalPending.Store(0)
if GOOS == "darwin" || GOOS == "ios" {
pendingPreemptSignals.Add(-1)
}
}
const preemptMSupported = true
// preemptM sends a preemption request to mp. This request may be
// handled asynchronously and may be coalesced with other requests to
// the M. When the request is received, if the running G or P are
// marked for preemption and the goroutine is at an asynchronous
// safe-point, it will preempt the goroutine. It always atomically
// increments mp.preemptGen after handling a preemption request.
func preemptM(mp *m) {
// On Darwin, don't try to preempt threads during exec.
// Issue #41702.
if GOOS == "darwin" || GOOS == "ios" {
execLock.rlock()
}
if mp.signalPending.CompareAndSwap(0, 1) {
if GOOS == "darwin" || GOOS == "ios" {
pendingPreemptSignals.Add(1)
}
// If multiple threads are preempting the same M, it may send many
// signals to the same M such that it hardly make progress, causing
// live-lock problem. Apparently this could happen on darwin. See
// issue #37741.
// Only send a signal if there isn't already one pending.
signalM(mp, sigPreempt)
}
if GOOS == "darwin" || GOOS == "ios" {
execLock.runlock()
}
}
// sigFetchG fetches the value of G safely when running in a signal handler.
// On some architectures, the g value may be clobbered when running in a VDSO.
// See issue #32912.
//
//go:nosplit
func sigFetchG(c *sigctxt) *g {
switch GOARCH {
case "arm", "arm64", "loong64", "ppc64", "ppc64le", "riscv64", "s390x":
if !iscgo && inVDSOPage(c.sigpc()) {
// When using cgo, we save the g on TLS and load it from there
// in sigtramp. Just use that.
// Otherwise, before making a VDSO call we save the g to the
// bottom of the signal stack. Fetch from there.
// TODO: in efence mode, stack is sysAlloc'd, so this wouldn't
// work.
sp := sys.GetCallerSP()
s := spanOf(sp)
runtime: atomically set span state and use as publication barrier When everything is working correctly, any pointer the garbage collector encounters can only point into a fully initialized heap span, since the span must have been initialized before that pointer could escape the heap allocator and become visible to the GC. However, in various cases, we try to be defensive against bad pointers. In findObject, this is just a sanity check: we never expect to find a bad pointer, but programming errors can lead to them. In spanOfHeap, we don't necessarily trust the pointer and we're trying to check if it really does point to the heap, though it should always point to something. Conservative scanning takes this to a new level, since it can only guess that a word may be a pointer and verify this. In all of these cases, we have a problem that the span lookup and check can race with span initialization, since the span becomes visible to lookups before it's fully initialized. Furthermore, we're about to start initializing the span without the heap lock held, which is going to introduce races where accesses were previously protected by the heap lock. To address this, this CL makes accesses to mspan.state atomic, and ensures that the span is fully initialized before setting the state to mSpanInUse. All loads are now atomic, and in any case where we don't trust the pointer, it first atomically loads the span state and checks that it's mSpanInUse, after which it will have synchronized with span initialization and can safely check the other span fields. For #10958, #24543, but a good fix in general. Change-Id: I518b7c63555b02064b98aa5f802c92b758fef853 Reviewed-on: https://go-review.googlesource.com/c/go/+/203286 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Michael Knyszek <mknyszek@google.com>
2019-10-23 11:25:38 -04:00
if s != nil && s.state.get() == mSpanManual && s.base() < sp && sp < s.limit {
gp := *(**g)(unsafe.Pointer(s.base()))
return gp
}
return nil
}
}
return getg()
}
// sigtrampgo is called from the signal handler function, sigtramp,
// written in assembly code.
// This is called by the signal handler, and the world may be stopped.
//
// It must be nosplit because getg() is still the G that was running
// (if any) when the signal was delivered, but it's (usually) called
// on the gsignal stack. Until this switches the G to gsignal, the
// stack bounds check won't work.
//
//go:nosplit
//go:nowritebarrierrec
func sigtrampgo(sig uint32, info *siginfo, ctx unsafe.Pointer) {
if sigfwdgo(sig, info, ctx) {
return
}
c := &sigctxt{info, ctx}
gp := sigFetchG(c)
setg(gp)
runtime/cgo: store M for C-created thread in pthread key This reapplies CL 485500, with a fix drafted in CL 492987 incorporated. CL 485500 is reverted due to #60004 and #60007. #60004 is fixed in CL 492743. #60007 is fixed in CL 492987 (incorporated in this CL). [Original CL 485500 description] This reapplies CL 481061, with the followup fixes in CL 482975, CL 485315, and CL 485316 incorporated. CL 481061, by doujiang24 <doujiang24@gmail.com>, speed up C to Go calls by binding the M to the C thread. See below for its description. CL 482975 is a followup fix to a C declaration in testprogcgo. CL 485315 is a followup fix for x_cgo_getstackbound on Illumos. CL 485316 is a followup cleanup for ppc64 assembly. CL 479915 passed the G to _cgo_getstackbound for direct updates to gp.stack.lo. A G can be reused on a new thread after the previous thread exited. This could trigger the C TSAN race detector because it couldn't see the synchronization in Go (lockextra) preventing the same G from being used on multiple threads at the same time. We work around this by passing the address of a stack variable to _cgo_getstackbound rather than the G. The stack is generally unique per thread, so TSAN won't see the same address from multiple threads. Even if stacks are reused across threads by pthread, C TSAN should see the synchonization in the stack allocator. A regression test is added to misc/cgo/testsanitizer. [Original CL 481061 description] This reapplies CL 392854, with the followup fixes in CL 479255, CL 479915, and CL 481057 incorporated. CL 392854, by doujiang24 <doujiang24@gmail.com>, speed up C to Go calls by binding the M to the C thread. See below for its description. CL 479255 is a followup fix for a small bug in ARM assembly code. CL 479915 is another followup fix to address C to Go calls after the C code uses some stack, but that CL is also buggy. CL 481057, by Michael Knyszek, is a followup fix for a memory leak bug of CL 479915. [Original CL 392854 description] In a C thread, it's necessary to acquire an extra M by using needm while invoking a Go function from C. But, needm and dropm are heavy costs due to the signal-related syscalls. So, we change to not dropm while returning back to C, which means binding the extra M to the C thread until it exits, to avoid needm and dropm on each C to Go call. Instead, we only dropm while the C thread exits, so the extra M won't leak. When invoking a Go function from C: Allocate a pthread variable using pthread_key_create, only once per shared object, and register a thread-exit-time destructor. And store the g0 of the current m into the thread-specified value of the pthread key, only once per C thread, so that the destructor will put the extra M back onto the extra M list while the C thread exits. When returning back to C: Skip dropm in cgocallback, when the pthread variable has been created, so that the extra M will be reused the next time invoke a Go function from C. This is purely a performance optimization. The old version, in which needm & dropm happen on each cgo call, is still correct too, and we have to keep the old version on systems with cgo but without pthreads, like Windows. This optimization is significant, and the specific value depends on the OS system and CPU, but in general, it can be considered as 10x faster, for a simple Go function call from a C thread. For the newly added BenchmarkCGoInCThread, some benchmark results: 1. it's 28x faster, from 3395 ns/op to 121 ns/op, in darwin OS & Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz 2. it's 6.5x faster, from 1495 ns/op to 230 ns/op, in Linux OS & Intel(R) Xeon(R) CPU E5-2630 0 @ 2.30GHz [CL 479915 description] Currently, when C calls into Go the first time, we grab an M using needm, which sets m.g0's stack bounds using the SP. We don't know how big the stack is, so we simply assume 32K. Previously, when the Go function returns to C, we drop the M, and the next time C calls into Go, we put a new stack bound on the g0 based on the current SP. After CL 392854, we don't drop the M, and the next time C calls into Go, we reuse the same g0, without recomputing the stack bounds. If the C code uses quite a bit of stack space before calling into Go, the SP may be well below the 32K stack bound we assumed, so the runtime thinks the g0 stack overflows. This CL makes needm get a more accurate stack bound from pthread. (In some platforms this may still be a guess as we don't know exactly where we are in the C stack), but it is probably better than simply assuming 32K. [CL 492987 description] On the first call into Go from a C thread, currently we set the g0 stack's high bound imprecisely based on the SP. With CL 485500, we keep the M and don't recompute the stack bounds when it calls into Go again. If the first call is made when the C thread uses some deep stack, but a subsequent call is made with a shallower stack, the SP may be above g0.stack.hi. This is usually okay as we don't check usually stack.hi. One place where we do check for stack.hi is in the signal handler, in adjustSignalStack. In particular, C TSAN delivers signals on the g0 stack (instead of the usual signal stack). If the SP is above g0.stack.hi, we don't see it is on the g0 stack, and throws. This CL makes it get an accurate stack upper bound with the pthread API (on the platforms where it is available). Also add some debug print for the "handler not on signal stack" throw. Fixes #51676. Fixes #59294. Fixes #59678. Fixes #60007. Change-Id: Ie51c8e81ade34ec81d69fd7bce1fe0039a470776 Reviewed-on: https://go-review.googlesource.com/c/go/+/495855 Run-TryBot: Cherry Mui <cherryyz@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Michael Pratt <mpratt@google.com>
2023-05-17 12:01:15 -04:00
if gp == nil || (gp.m != nil && gp.m.isExtraInC) {
if sig == _SIGPROF {
// Some platforms (Linux) have per-thread timers, which we use in
// combination with the process-wide timer. Avoid double-counting.
if validSIGPROF(nil, c) {
sigprofNonGoPC(c.sigpc())
}
return
}
if sig == sigPreempt && preemptMSupported && debug.asyncpreemptoff == 0 {
// This is probably a signal from preemptM sent
// while executing Go code but received while
// executing non-Go code.
// We got past sigfwdgo, so we know that there is
// no non-Go signal handler for sigPreempt.
// The default behavior for sigPreempt is to ignore
// the signal, so badsignal will be a no-op anyway.
if GOOS == "darwin" || GOOS == "ios" {
pendingPreemptSignals.Add(-1)
}
return
}
c.fixsigcode(sig)
runtime/cgo: store M for C-created thread in pthread key This reapplies CL 485500, with a fix drafted in CL 492987 incorporated. CL 485500 is reverted due to #60004 and #60007. #60004 is fixed in CL 492743. #60007 is fixed in CL 492987 (incorporated in this CL). [Original CL 485500 description] This reapplies CL 481061, with the followup fixes in CL 482975, CL 485315, and CL 485316 incorporated. CL 481061, by doujiang24 <doujiang24@gmail.com>, speed up C to Go calls by binding the M to the C thread. See below for its description. CL 482975 is a followup fix to a C declaration in testprogcgo. CL 485315 is a followup fix for x_cgo_getstackbound on Illumos. CL 485316 is a followup cleanup for ppc64 assembly. CL 479915 passed the G to _cgo_getstackbound for direct updates to gp.stack.lo. A G can be reused on a new thread after the previous thread exited. This could trigger the C TSAN race detector because it couldn't see the synchronization in Go (lockextra) preventing the same G from being used on multiple threads at the same time. We work around this by passing the address of a stack variable to _cgo_getstackbound rather than the G. The stack is generally unique per thread, so TSAN won't see the same address from multiple threads. Even if stacks are reused across threads by pthread, C TSAN should see the synchonization in the stack allocator. A regression test is added to misc/cgo/testsanitizer. [Original CL 481061 description] This reapplies CL 392854, with the followup fixes in CL 479255, CL 479915, and CL 481057 incorporated. CL 392854, by doujiang24 <doujiang24@gmail.com>, speed up C to Go calls by binding the M to the C thread. See below for its description. CL 479255 is a followup fix for a small bug in ARM assembly code. CL 479915 is another followup fix to address C to Go calls after the C code uses some stack, but that CL is also buggy. CL 481057, by Michael Knyszek, is a followup fix for a memory leak bug of CL 479915. [Original CL 392854 description] In a C thread, it's necessary to acquire an extra M by using needm while invoking a Go function from C. But, needm and dropm are heavy costs due to the signal-related syscalls. So, we change to not dropm while returning back to C, which means binding the extra M to the C thread until it exits, to avoid needm and dropm on each C to Go call. Instead, we only dropm while the C thread exits, so the extra M won't leak. When invoking a Go function from C: Allocate a pthread variable using pthread_key_create, only once per shared object, and register a thread-exit-time destructor. And store the g0 of the current m into the thread-specified value of the pthread key, only once per C thread, so that the destructor will put the extra M back onto the extra M list while the C thread exits. When returning back to C: Skip dropm in cgocallback, when the pthread variable has been created, so that the extra M will be reused the next time invoke a Go function from C. This is purely a performance optimization. The old version, in which needm & dropm happen on each cgo call, is still correct too, and we have to keep the old version on systems with cgo but without pthreads, like Windows. This optimization is significant, and the specific value depends on the OS system and CPU, but in general, it can be considered as 10x faster, for a simple Go function call from a C thread. For the newly added BenchmarkCGoInCThread, some benchmark results: 1. it's 28x faster, from 3395 ns/op to 121 ns/op, in darwin OS & Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz 2. it's 6.5x faster, from 1495 ns/op to 230 ns/op, in Linux OS & Intel(R) Xeon(R) CPU E5-2630 0 @ 2.30GHz [CL 479915 description] Currently, when C calls into Go the first time, we grab an M using needm, which sets m.g0's stack bounds using the SP. We don't know how big the stack is, so we simply assume 32K. Previously, when the Go function returns to C, we drop the M, and the next time C calls into Go, we put a new stack bound on the g0 based on the current SP. After CL 392854, we don't drop the M, and the next time C calls into Go, we reuse the same g0, without recomputing the stack bounds. If the C code uses quite a bit of stack space before calling into Go, the SP may be well below the 32K stack bound we assumed, so the runtime thinks the g0 stack overflows. This CL makes needm get a more accurate stack bound from pthread. (In some platforms this may still be a guess as we don't know exactly where we are in the C stack), but it is probably better than simply assuming 32K. [CL 492987 description] On the first call into Go from a C thread, currently we set the g0 stack's high bound imprecisely based on the SP. With CL 485500, we keep the M and don't recompute the stack bounds when it calls into Go again. If the first call is made when the C thread uses some deep stack, but a subsequent call is made with a shallower stack, the SP may be above g0.stack.hi. This is usually okay as we don't check usually stack.hi. One place where we do check for stack.hi is in the signal handler, in adjustSignalStack. In particular, C TSAN delivers signals on the g0 stack (instead of the usual signal stack). If the SP is above g0.stack.hi, we don't see it is on the g0 stack, and throws. This CL makes it get an accurate stack upper bound with the pthread API (on the platforms where it is available). Also add some debug print for the "handler not on signal stack" throw. Fixes #51676. Fixes #59294. Fixes #59678. Fixes #60007. Change-Id: Ie51c8e81ade34ec81d69fd7bce1fe0039a470776 Reviewed-on: https://go-review.googlesource.com/c/go/+/495855 Run-TryBot: Cherry Mui <cherryyz@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Michael Pratt <mpratt@google.com>
2023-05-17 12:01:15 -04:00
// Set g to nil here and badsignal will use g0 by needm.
// TODO: reuse the current m here by using the gsignal and adjustSignalStack,
// since the current g maybe a normal goroutine and actually running on the signal stack,
// it may hit stack split that is not expected here.
if gp != nil {
setg(nil)
}
badsignal(uintptr(sig), c)
runtime/cgo: store M for C-created thread in pthread key This reapplies CL 485500, with a fix drafted in CL 492987 incorporated. CL 485500 is reverted due to #60004 and #60007. #60004 is fixed in CL 492743. #60007 is fixed in CL 492987 (incorporated in this CL). [Original CL 485500 description] This reapplies CL 481061, with the followup fixes in CL 482975, CL 485315, and CL 485316 incorporated. CL 481061, by doujiang24 <doujiang24@gmail.com>, speed up C to Go calls by binding the M to the C thread. See below for its description. CL 482975 is a followup fix to a C declaration in testprogcgo. CL 485315 is a followup fix for x_cgo_getstackbound on Illumos. CL 485316 is a followup cleanup for ppc64 assembly. CL 479915 passed the G to _cgo_getstackbound for direct updates to gp.stack.lo. A G can be reused on a new thread after the previous thread exited. This could trigger the C TSAN race detector because it couldn't see the synchronization in Go (lockextra) preventing the same G from being used on multiple threads at the same time. We work around this by passing the address of a stack variable to _cgo_getstackbound rather than the G. The stack is generally unique per thread, so TSAN won't see the same address from multiple threads. Even if stacks are reused across threads by pthread, C TSAN should see the synchonization in the stack allocator. A regression test is added to misc/cgo/testsanitizer. [Original CL 481061 description] This reapplies CL 392854, with the followup fixes in CL 479255, CL 479915, and CL 481057 incorporated. CL 392854, by doujiang24 <doujiang24@gmail.com>, speed up C to Go calls by binding the M to the C thread. See below for its description. CL 479255 is a followup fix for a small bug in ARM assembly code. CL 479915 is another followup fix to address C to Go calls after the C code uses some stack, but that CL is also buggy. CL 481057, by Michael Knyszek, is a followup fix for a memory leak bug of CL 479915. [Original CL 392854 description] In a C thread, it's necessary to acquire an extra M by using needm while invoking a Go function from C. But, needm and dropm are heavy costs due to the signal-related syscalls. So, we change to not dropm while returning back to C, which means binding the extra M to the C thread until it exits, to avoid needm and dropm on each C to Go call. Instead, we only dropm while the C thread exits, so the extra M won't leak. When invoking a Go function from C: Allocate a pthread variable using pthread_key_create, only once per shared object, and register a thread-exit-time destructor. And store the g0 of the current m into the thread-specified value of the pthread key, only once per C thread, so that the destructor will put the extra M back onto the extra M list while the C thread exits. When returning back to C: Skip dropm in cgocallback, when the pthread variable has been created, so that the extra M will be reused the next time invoke a Go function from C. This is purely a performance optimization. The old version, in which needm & dropm happen on each cgo call, is still correct too, and we have to keep the old version on systems with cgo but without pthreads, like Windows. This optimization is significant, and the specific value depends on the OS system and CPU, but in general, it can be considered as 10x faster, for a simple Go function call from a C thread. For the newly added BenchmarkCGoInCThread, some benchmark results: 1. it's 28x faster, from 3395 ns/op to 121 ns/op, in darwin OS & Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz 2. it's 6.5x faster, from 1495 ns/op to 230 ns/op, in Linux OS & Intel(R) Xeon(R) CPU E5-2630 0 @ 2.30GHz [CL 479915 description] Currently, when C calls into Go the first time, we grab an M using needm, which sets m.g0's stack bounds using the SP. We don't know how big the stack is, so we simply assume 32K. Previously, when the Go function returns to C, we drop the M, and the next time C calls into Go, we put a new stack bound on the g0 based on the current SP. After CL 392854, we don't drop the M, and the next time C calls into Go, we reuse the same g0, without recomputing the stack bounds. If the C code uses quite a bit of stack space before calling into Go, the SP may be well below the 32K stack bound we assumed, so the runtime thinks the g0 stack overflows. This CL makes needm get a more accurate stack bound from pthread. (In some platforms this may still be a guess as we don't know exactly where we are in the C stack), but it is probably better than simply assuming 32K. [CL 492987 description] On the first call into Go from a C thread, currently we set the g0 stack's high bound imprecisely based on the SP. With CL 485500, we keep the M and don't recompute the stack bounds when it calls into Go again. If the first call is made when the C thread uses some deep stack, but a subsequent call is made with a shallower stack, the SP may be above g0.stack.hi. This is usually okay as we don't check usually stack.hi. One place where we do check for stack.hi is in the signal handler, in adjustSignalStack. In particular, C TSAN delivers signals on the g0 stack (instead of the usual signal stack). If the SP is above g0.stack.hi, we don't see it is on the g0 stack, and throws. This CL makes it get an accurate stack upper bound with the pthread API (on the platforms where it is available). Also add some debug print for the "handler not on signal stack" throw. Fixes #51676. Fixes #59294. Fixes #59678. Fixes #60007. Change-Id: Ie51c8e81ade34ec81d69fd7bce1fe0039a470776 Reviewed-on: https://go-review.googlesource.com/c/go/+/495855 Run-TryBot: Cherry Mui <cherryyz@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Michael Pratt <mpratt@google.com>
2023-05-17 12:01:15 -04:00
// Restore g
if gp != nil {
setg(gp)
}
return
}
setg(gp.m.gsignal)
// If some non-Go code called sigaltstack, adjust.
var gsignalStack gsignalStack
setStack := adjustSignalStack(sig, gp.m, &gsignalStack)
if setStack {
gp.m.gsignal.stktopsp = sys.GetCallerSP()
}
if gp.stackguard0 == stackFork {
signalDuringFork(sig)
}
c.fixsigcode(sig)
sighandler(sig, info, ctx, gp)
setg(gp)
if setStack {
restoreGsignalStack(&gsignalStack)
}
}
// If the signal handler receives a SIGPROF signal on a non-Go thread,
// it tries to collect a traceback into sigprofCallers.
// sigprofCallersUse is set to non-zero while sigprofCallers holds a traceback.
var sigprofCallers cgoCallers
var sigprofCallersUse uint32
// sigprofNonGo is called if we receive a SIGPROF signal on a non-Go thread,
// and the signal handler collected a stack trace in sigprofCallers.
// When this is called, sigprofCallersUse will be non-zero.
// g is nil, and what we can do is very limited.
//
// It is called from the signal handling functions written in assembly code that
// are active for cgo programs, cgoSigtramp and sigprofNonGoWrapper, which have
// not verified that the SIGPROF delivery corresponds to the best available
// profiling source for this thread.
//
//go:nosplit
//go:nowritebarrierrec
func sigprofNonGo(sig uint32, info *siginfo, ctx unsafe.Pointer) {
if prof.hz.Load() != 0 {
c := &sigctxt{info, ctx}
// Some platforms (Linux) have per-thread timers, which we use in
// combination with the process-wide timer. Avoid double-counting.
if validSIGPROF(nil, c) {
n := 0
for n < len(sigprofCallers) && sigprofCallers[n] != 0 {
n++
}
cpuprof.addNonGo(sigprofCallers[:n])
}
}
atomic.Store(&sigprofCallersUse, 0)
}
// sigprofNonGoPC is called when a profiling signal arrived on a
// non-Go thread and we have a single PC value, not a stack trace.
// g is nil, and what we can do is very limited.
//
//go:nosplit
//go:nowritebarrierrec
func sigprofNonGoPC(pc uintptr) {
if prof.hz.Load() != 0 {
stk := []uintptr{
pc,
abi.FuncPCABIInternal(_ExternalCode) + sys.PCQuantum,
}
cpuprof.addNonGo(stk)
}
}
// adjustSignalStack adjusts the current stack guard based on the
// stack pointer that is actually in use while handling a signal.
// We do this in case some non-Go code called sigaltstack.
// This reports whether the stack was adjusted, and if so stores the old
// signal stack in *gsigstack.
//
//go:nosplit
func adjustSignalStack(sig uint32, mp *m, gsigStack *gsignalStack) bool {
sp := uintptr(unsafe.Pointer(&sig))
if sp >= mp.gsignal.stack.lo && sp < mp.gsignal.stack.hi {
return false
}
var st stackt
sigaltstack(nil, &st)
stsp := uintptr(unsafe.Pointer(st.ss_sp))
if st.ss_flags&_SS_DISABLE == 0 && sp >= stsp && sp < stsp+st.ss_size {
setGsignalStack(&st, gsigStack)
return true
}
if sp >= mp.g0.stack.lo && sp < mp.g0.stack.hi {
// The signal was delivered on the g0 stack.
// This can happen when linked with C code
// using the thread sanitizer, which collects
// signals then delivers them itself by calling
// the signal handler directly when C code,
// including C code called via cgo, calls a
// TSAN-intercepted function such as malloc.
//
// We check this condition last as g0.stack.lo
// may be not very accurate (see mstart).
st := stackt{ss_size: mp.g0.stack.hi - mp.g0.stack.lo}
setSignalstackSP(&st, mp.g0.stack.lo)
setGsignalStack(&st, gsigStack)
return true
}
// sp is not within gsignal stack, g0 stack, or sigaltstack. Bad.
setg(nil)
runtime/cgo: store M for C-created thread in pthread key This reapplies CL 485500, with a fix drafted in CL 492987 incorporated. CL 485500 is reverted due to #60004 and #60007. #60004 is fixed in CL 492743. #60007 is fixed in CL 492987 (incorporated in this CL). [Original CL 485500 description] This reapplies CL 481061, with the followup fixes in CL 482975, CL 485315, and CL 485316 incorporated. CL 481061, by doujiang24 <doujiang24@gmail.com>, speed up C to Go calls by binding the M to the C thread. See below for its description. CL 482975 is a followup fix to a C declaration in testprogcgo. CL 485315 is a followup fix for x_cgo_getstackbound on Illumos. CL 485316 is a followup cleanup for ppc64 assembly. CL 479915 passed the G to _cgo_getstackbound for direct updates to gp.stack.lo. A G can be reused on a new thread after the previous thread exited. This could trigger the C TSAN race detector because it couldn't see the synchronization in Go (lockextra) preventing the same G from being used on multiple threads at the same time. We work around this by passing the address of a stack variable to _cgo_getstackbound rather than the G. The stack is generally unique per thread, so TSAN won't see the same address from multiple threads. Even if stacks are reused across threads by pthread, C TSAN should see the synchonization in the stack allocator. A regression test is added to misc/cgo/testsanitizer. [Original CL 481061 description] This reapplies CL 392854, with the followup fixes in CL 479255, CL 479915, and CL 481057 incorporated. CL 392854, by doujiang24 <doujiang24@gmail.com>, speed up C to Go calls by binding the M to the C thread. See below for its description. CL 479255 is a followup fix for a small bug in ARM assembly code. CL 479915 is another followup fix to address C to Go calls after the C code uses some stack, but that CL is also buggy. CL 481057, by Michael Knyszek, is a followup fix for a memory leak bug of CL 479915. [Original CL 392854 description] In a C thread, it's necessary to acquire an extra M by using needm while invoking a Go function from C. But, needm and dropm are heavy costs due to the signal-related syscalls. So, we change to not dropm while returning back to C, which means binding the extra M to the C thread until it exits, to avoid needm and dropm on each C to Go call. Instead, we only dropm while the C thread exits, so the extra M won't leak. When invoking a Go function from C: Allocate a pthread variable using pthread_key_create, only once per shared object, and register a thread-exit-time destructor. And store the g0 of the current m into the thread-specified value of the pthread key, only once per C thread, so that the destructor will put the extra M back onto the extra M list while the C thread exits. When returning back to C: Skip dropm in cgocallback, when the pthread variable has been created, so that the extra M will be reused the next time invoke a Go function from C. This is purely a performance optimization. The old version, in which needm & dropm happen on each cgo call, is still correct too, and we have to keep the old version on systems with cgo but without pthreads, like Windows. This optimization is significant, and the specific value depends on the OS system and CPU, but in general, it can be considered as 10x faster, for a simple Go function call from a C thread. For the newly added BenchmarkCGoInCThread, some benchmark results: 1. it's 28x faster, from 3395 ns/op to 121 ns/op, in darwin OS & Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz 2. it's 6.5x faster, from 1495 ns/op to 230 ns/op, in Linux OS & Intel(R) Xeon(R) CPU E5-2630 0 @ 2.30GHz [CL 479915 description] Currently, when C calls into Go the first time, we grab an M using needm, which sets m.g0's stack bounds using the SP. We don't know how big the stack is, so we simply assume 32K. Previously, when the Go function returns to C, we drop the M, and the next time C calls into Go, we put a new stack bound on the g0 based on the current SP. After CL 392854, we don't drop the M, and the next time C calls into Go, we reuse the same g0, without recomputing the stack bounds. If the C code uses quite a bit of stack space before calling into Go, the SP may be well below the 32K stack bound we assumed, so the runtime thinks the g0 stack overflows. This CL makes needm get a more accurate stack bound from pthread. (In some platforms this may still be a guess as we don't know exactly where we are in the C stack), but it is probably better than simply assuming 32K. [CL 492987 description] On the first call into Go from a C thread, currently we set the g0 stack's high bound imprecisely based on the SP. With CL 485500, we keep the M and don't recompute the stack bounds when it calls into Go again. If the first call is made when the C thread uses some deep stack, but a subsequent call is made with a shallower stack, the SP may be above g0.stack.hi. This is usually okay as we don't check usually stack.hi. One place where we do check for stack.hi is in the signal handler, in adjustSignalStack. In particular, C TSAN delivers signals on the g0 stack (instead of the usual signal stack). If the SP is above g0.stack.hi, we don't see it is on the g0 stack, and throws. This CL makes it get an accurate stack upper bound with the pthread API (on the platforms where it is available). Also add some debug print for the "handler not on signal stack" throw. Fixes #51676. Fixes #59294. Fixes #59678. Fixes #60007. Change-Id: Ie51c8e81ade34ec81d69fd7bce1fe0039a470776 Reviewed-on: https://go-review.googlesource.com/c/go/+/495855 Run-TryBot: Cherry Mui <cherryyz@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Michael Pratt <mpratt@google.com>
2023-05-17 12:01:15 -04:00
needm(true)
if st.ss_flags&_SS_DISABLE != 0 {
noSignalStack(sig)
} else {
runtime/cgo: store M for C-created thread in pthread key This reapplies CL 485500, with a fix drafted in CL 492987 incorporated. CL 485500 is reverted due to #60004 and #60007. #60004 is fixed in CL 492743. #60007 is fixed in CL 492987 (incorporated in this CL). [Original CL 485500 description] This reapplies CL 481061, with the followup fixes in CL 482975, CL 485315, and CL 485316 incorporated. CL 481061, by doujiang24 <doujiang24@gmail.com>, speed up C to Go calls by binding the M to the C thread. See below for its description. CL 482975 is a followup fix to a C declaration in testprogcgo. CL 485315 is a followup fix for x_cgo_getstackbound on Illumos. CL 485316 is a followup cleanup for ppc64 assembly. CL 479915 passed the G to _cgo_getstackbound for direct updates to gp.stack.lo. A G can be reused on a new thread after the previous thread exited. This could trigger the C TSAN race detector because it couldn't see the synchronization in Go (lockextra) preventing the same G from being used on multiple threads at the same time. We work around this by passing the address of a stack variable to _cgo_getstackbound rather than the G. The stack is generally unique per thread, so TSAN won't see the same address from multiple threads. Even if stacks are reused across threads by pthread, C TSAN should see the synchonization in the stack allocator. A regression test is added to misc/cgo/testsanitizer. [Original CL 481061 description] This reapplies CL 392854, with the followup fixes in CL 479255, CL 479915, and CL 481057 incorporated. CL 392854, by doujiang24 <doujiang24@gmail.com>, speed up C to Go calls by binding the M to the C thread. See below for its description. CL 479255 is a followup fix for a small bug in ARM assembly code. CL 479915 is another followup fix to address C to Go calls after the C code uses some stack, but that CL is also buggy. CL 481057, by Michael Knyszek, is a followup fix for a memory leak bug of CL 479915. [Original CL 392854 description] In a C thread, it's necessary to acquire an extra M by using needm while invoking a Go function from C. But, needm and dropm are heavy costs due to the signal-related syscalls. So, we change to not dropm while returning back to C, which means binding the extra M to the C thread until it exits, to avoid needm and dropm on each C to Go call. Instead, we only dropm while the C thread exits, so the extra M won't leak. When invoking a Go function from C: Allocate a pthread variable using pthread_key_create, only once per shared object, and register a thread-exit-time destructor. And store the g0 of the current m into the thread-specified value of the pthread key, only once per C thread, so that the destructor will put the extra M back onto the extra M list while the C thread exits. When returning back to C: Skip dropm in cgocallback, when the pthread variable has been created, so that the extra M will be reused the next time invoke a Go function from C. This is purely a performance optimization. The old version, in which needm & dropm happen on each cgo call, is still correct too, and we have to keep the old version on systems with cgo but without pthreads, like Windows. This optimization is significant, and the specific value depends on the OS system and CPU, but in general, it can be considered as 10x faster, for a simple Go function call from a C thread. For the newly added BenchmarkCGoInCThread, some benchmark results: 1. it's 28x faster, from 3395 ns/op to 121 ns/op, in darwin OS & Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz 2. it's 6.5x faster, from 1495 ns/op to 230 ns/op, in Linux OS & Intel(R) Xeon(R) CPU E5-2630 0 @ 2.30GHz [CL 479915 description] Currently, when C calls into Go the first time, we grab an M using needm, which sets m.g0's stack bounds using the SP. We don't know how big the stack is, so we simply assume 32K. Previously, when the Go function returns to C, we drop the M, and the next time C calls into Go, we put a new stack bound on the g0 based on the current SP. After CL 392854, we don't drop the M, and the next time C calls into Go, we reuse the same g0, without recomputing the stack bounds. If the C code uses quite a bit of stack space before calling into Go, the SP may be well below the 32K stack bound we assumed, so the runtime thinks the g0 stack overflows. This CL makes needm get a more accurate stack bound from pthread. (In some platforms this may still be a guess as we don't know exactly where we are in the C stack), but it is probably better than simply assuming 32K. [CL 492987 description] On the first call into Go from a C thread, currently we set the g0 stack's high bound imprecisely based on the SP. With CL 485500, we keep the M and don't recompute the stack bounds when it calls into Go again. If the first call is made when the C thread uses some deep stack, but a subsequent call is made with a shallower stack, the SP may be above g0.stack.hi. This is usually okay as we don't check usually stack.hi. One place where we do check for stack.hi is in the signal handler, in adjustSignalStack. In particular, C TSAN delivers signals on the g0 stack (instead of the usual signal stack). If the SP is above g0.stack.hi, we don't see it is on the g0 stack, and throws. This CL makes it get an accurate stack upper bound with the pthread API (on the platforms where it is available). Also add some debug print for the "handler not on signal stack" throw. Fixes #51676. Fixes #59294. Fixes #59678. Fixes #60007. Change-Id: Ie51c8e81ade34ec81d69fd7bce1fe0039a470776 Reviewed-on: https://go-review.googlesource.com/c/go/+/495855 Run-TryBot: Cherry Mui <cherryyz@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Michael Pratt <mpratt@google.com>
2023-05-17 12:01:15 -04:00
sigNotOnStack(sig, sp, mp)
}
dropm()
return false
}
// crashing is the number of m's we have waited for when implementing
// GOTRACEBACK=crash when a signal is received.
var crashing atomic.Int32
// testSigtrap and testSigusr1 are used by the runtime tests. If
// non-nil, it is called on SIGTRAP/SIGUSR1. If it returns true, the
// normal behavior on this signal is suppressed.
var testSigtrap func(info *siginfo, ctxt *sigctxt, gp *g) bool
var testSigusr1 func(gp *g) bool
// sigsysIgnored is non-zero if we are currently ignoring SIGSYS. See issue #69065.
var sigsysIgnored uint32
//go:linkname ignoreSIGSYS os.ignoreSIGSYS
func ignoreSIGSYS() {
atomic.Store(&sigsysIgnored, 1)
}
//go:linkname restoreSIGSYS os.restoreSIGSYS
func restoreSIGSYS() {
atomic.Store(&sigsysIgnored, 0)
}
// sighandler is invoked when a signal occurs. The global g will be
// set to a gsignal goroutine and we will be running on the alternate
// signal stack. The parameter gp will be the value of the global g
// when the signal occurred. The sig, info, and ctxt parameters are
// from the system signal handler: they are the parameters passed when
// the SA is passed to the sigaction system call.
//
// The garbage collector may have stopped the world, so write barriers
// are not allowed.
//
//go:nowritebarrierrec
func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
// The g executing the signal handler. This is almost always
// mp.gsignal. See delayedSignal for an exception.
gsignal := getg()
mp := gsignal.m
c := &sigctxt{info, ctxt}
// Cgo TSAN (not the Go race detector) intercepts signals and calls the
// signal handler at a later time. When the signal handler is called, the
// memory may have changed, but the signal context remains old. The
// unmatched signal context and memory makes it unsafe to unwind or inspect
// the stack. So we ignore delayed non-fatal signals that will cause a stack
// inspection (profiling signal and preemption signal).
// cgo_yield is only non-nil for TSAN, and is specifically used to trigger
// signal delivery. We use that as an indicator of delayed signals.
// For delayed signals, the handler is called on the g0 stack (see
// adjustSignalStack).
delayedSignal := *cgo_yield != nil && mp != nil && gsignal.stack == mp.g0.stack
if sig == _SIGPROF {
// Some platforms (Linux) have per-thread timers, which we use in
// combination with the process-wide timer. Avoid double-counting.
if !delayedSignal && validSIGPROF(mp, c) {
sigprof(c.sigpc(), c.sigsp(), c.siglr(), gp, mp)
}
return
}
if sig == _SIGTRAP && testSigtrap != nil && testSigtrap(info, (*sigctxt)(noescape(unsafe.Pointer(c))), gp) {
return
}
if sig == _SIGUSR1 && testSigusr1 != nil && testSigusr1(gp) {
return
}
if (GOOS == "linux" || GOOS == "android") && sig == sigPerThreadSyscall {
runtime, syscall: reimplement AllThreadsSyscall using only signals. In issue 50113, we see that a thread blocked in a system call can result in a hang of AllThreadsSyscall. To resolve this, we must send a signal to these threads to knock them out of the system call long enough to run the per-thread syscall. Stepping back, if we need to send signals anyway, it should be possible to implement this entire mechanism on top of signals. This CL does so, vastly simplifying the mechanism, both as a direct result of newly-unnecessary code as well as some ancillary simplifications to make things simpler to follow. Major changes: * The rest of the mechanism is moved to os_linux.go, with fields in mOS instead of m itself. * 'Fixup' fields and functions are renamed to 'perThreadSyscall' so they are more precise about their purpose. * Rather than getting passed a closure, doAllThreadsSyscall takes the syscall number and arguments. This avoids a lot of hairy behavior: * The closure may potentially only be live in fields in the M, hidden from the GC. Not necessary with no closure. * The need to loan out the race context. A direct RawSyscall6 call does not require any race context. * The closure previously conditionally panicked in strange locations, like a signal handler. Now we simply throw. * All manual fixup synchronization with mPark, sysmon, templateThread, sigqueue, etc is gone. The core approach is much simpler: doAllThreadsSyscall sends a signal to every thread in allm, which executes the system call from the signal handler. We use (SIGRTMIN + 1), aka SIGSETXID, the same signal used by glibc for this purpose. As such, we are careful to only handle this signal on non-cgo binaries. Synchronization with thread creation is a key part of this CL. The comment near the top of doAllThreadsSyscall describes the required synchronization semantics and how they are achieved. Note that current use of allocmLock protects the state mutations of allm that are also protected by sched.lock. allocmLock is used instead of sched.lock simply to avoid holding sched.lock for so long. Fixes #50113 Change-Id: Ic7ea856dc66cf711731540a54996e08fc986ce84 Reviewed-on: https://go-review.googlesource.com/c/go/+/383434 Reviewed-by: Austin Clements <austin@google.com> Trust: Michael Pratt <mpratt@google.com> Run-TryBot: Michael Pratt <mpratt@google.com> TryBot-Result: Gopher Robot <gobot@golang.org>
2022-02-04 17:15:28 -05:00
// sigPerThreadSyscall is the same signal used by glibc for
// per-thread syscalls on Linux. We use it for the same purpose
// in non-cgo binaries. Since this signal is not _SigNotify,
// there is nothing more to do once we run the syscall.
runPerThreadSyscall()
return
}
if sig == sigPreempt && debug.asyncpreemptoff == 0 && !delayedSignal {
// Might be a preemption signal.
doSigPreempt(gp, c)
// Even if this was definitely a preemption signal, it
// may have been coalesced with another signal, so we
// still let it through to the application.
}
flags := int32(_SigThrow)
if sig < uint32(len(sigtable)) {
flags = sigtable[sig].flags
}
runtime: don't inject a sigpanic if not on user G stack If a panicking signal (e.g. SIGSEGV) happens on a g0 stack, we're either in the runtime or running C code. Either way we cannot recover and sigpanic will immediately throw. Further, injecting a sigpanic could make the C stack unwinder and the debugger fail to unwind the stack. So don't inject a sigpanic. If we have cgo traceback and symbolizer attached, if it panics in a C function ("CF" for the example below), previously it shows something like fatal error: unexpected signal during runtime execution [signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x45f1ef] runtime stack: runtime.throw({0x485460?, 0x0?}) .../runtime/panic.go:1076 +0x5c fp=0x7ffd77f60f58 sp=0x7ffd77f60f28 pc=0x42e39c runtime.sigpanic() .../runtime/signal_unix.go:821 +0x3e9 fp=0x7ffd77f60fb8 sp=0x7ffd77f60f58 pc=0x442229 goroutine 1 [syscall]: CF /tmp/pp/c.c:6 pc=0x45f1ef runtime.asmcgocall .../runtime/asm_amd64.s:869 pc=0x458007 runtime.cgocall(0x45f1d0, 0xc000053f70) .../runtime/cgocall.go:158 +0x51 fp=0xc000053f48 sp=0xc000053f10 pc=0x404551 main._Cfunc_CF() _cgo_gotypes.go:39 +0x3f fp=0xc000053f70 sp=0xc000053f48 pc=0x45f0bf Now it shows SIGSEGV: segmentation violation PC=0x45f1ef m=0 sigcode=1 signal arrived during cgo execution goroutine 1 [syscall]: CF /tmp/pp/c.c:6 pc=0x45f1ef runtime.asmcgocall .../runtime/asm_amd64.s:869 pc=0x458007 runtime.cgocall(0x45f1d0, 0xc00004ef70) .../runtime/cgocall.go:158 +0x51 fp=0xc00004ef48 sp=0xc00004ef10 pc=0x404551 main._Cfunc_CF() _cgo_gotypes.go:39 +0x3f fp=0xc00004ef70 sp=0xc00004ef48 pc=0x45f0bf I think the new one is reasonable. For #57698. Change-Id: I4f7af91761374e9b569dce4c7587499d4799137e Reviewed-on: https://go-review.googlesource.com/c/go/+/462437 Reviewed-by: Michael Pratt <mpratt@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Run-TryBot: Cherry Mui <cherryyz@google.com>
2023-01-17 19:15:39 -05:00
if !c.sigFromUser() && flags&_SigPanic != 0 && (gp.throwsplit || gp != mp.curg) {
// We can't safely sigpanic because it may grow the
// stack. Abort in the signal handler instead.
runtime: don't inject a sigpanic if not on user G stack If a panicking signal (e.g. SIGSEGV) happens on a g0 stack, we're either in the runtime or running C code. Either way we cannot recover and sigpanic will immediately throw. Further, injecting a sigpanic could make the C stack unwinder and the debugger fail to unwind the stack. So don't inject a sigpanic. If we have cgo traceback and symbolizer attached, if it panics in a C function ("CF" for the example below), previously it shows something like fatal error: unexpected signal during runtime execution [signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x45f1ef] runtime stack: runtime.throw({0x485460?, 0x0?}) .../runtime/panic.go:1076 +0x5c fp=0x7ffd77f60f58 sp=0x7ffd77f60f28 pc=0x42e39c runtime.sigpanic() .../runtime/signal_unix.go:821 +0x3e9 fp=0x7ffd77f60fb8 sp=0x7ffd77f60f58 pc=0x442229 goroutine 1 [syscall]: CF /tmp/pp/c.c:6 pc=0x45f1ef runtime.asmcgocall .../runtime/asm_amd64.s:869 pc=0x458007 runtime.cgocall(0x45f1d0, 0xc000053f70) .../runtime/cgocall.go:158 +0x51 fp=0xc000053f48 sp=0xc000053f10 pc=0x404551 main._Cfunc_CF() _cgo_gotypes.go:39 +0x3f fp=0xc000053f70 sp=0xc000053f48 pc=0x45f0bf Now it shows SIGSEGV: segmentation violation PC=0x45f1ef m=0 sigcode=1 signal arrived during cgo execution goroutine 1 [syscall]: CF /tmp/pp/c.c:6 pc=0x45f1ef runtime.asmcgocall .../runtime/asm_amd64.s:869 pc=0x458007 runtime.cgocall(0x45f1d0, 0xc00004ef70) .../runtime/cgocall.go:158 +0x51 fp=0xc00004ef48 sp=0xc00004ef10 pc=0x404551 main._Cfunc_CF() _cgo_gotypes.go:39 +0x3f fp=0xc00004ef70 sp=0xc00004ef48 pc=0x45f0bf I think the new one is reasonable. For #57698. Change-Id: I4f7af91761374e9b569dce4c7587499d4799137e Reviewed-on: https://go-review.googlesource.com/c/go/+/462437 Reviewed-by: Michael Pratt <mpratt@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Run-TryBot: Cherry Mui <cherryyz@google.com>
2023-01-17 19:15:39 -05:00
//
// Also don't inject a sigpanic if we are not on a
// user G stack. Either we're in the runtime, or we're
// running C code. Either way we cannot recover.
flags = _SigThrow
}
if isAbortPC(c.sigpc()) {
// On many architectures, the abort function just
// causes a memory fault. Don't turn that into a panic.
flags = _SigThrow
}
if !c.sigFromUser() && flags&_SigPanic != 0 {
// The signal is going to cause a panic.
// Arrange the stack so that it looks like the point
// where the signal occurred made a call to the
// function sigpanic. Then set the PC to sigpanic.
// Have to pass arguments out of band since
// augmenting the stack frame would break
// the unwinding code.
gp.sig = sig
gp.sigcode0 = uintptr(c.sigcode())
gp.sigcode1 = c.fault()
gp.sigpc = c.sigpc()
c.preparePanic(sig, gp)
return
}
if c.sigFromUser() || flags&_SigNotify != 0 {
if sigsend(sig) {
return
}
}
if c.sigFromUser() && signal_ignored(sig) {
return
}
if sig == _SIGSYS && c.sigFromSeccomp() && atomic.Load(&sigsysIgnored) != 0 {
return
}
if flags&_SigKill != 0 {
dieFromSignal(sig)
}
// _SigThrow means that we should exit now.
// If we get here with _SigPanic, it means that the signal
// was sent to us by a program (c.sigFromUser() is true);
// in that case, if we didn't handle it in sigsend, we exit now.
if flags&(_SigThrow|_SigPanic) == 0 {
return
}
mp.throwing = throwTypeRuntime
mp.caughtsig.set(gp)
if crashing.Load() == 0 {
startpanic_m()
}
gp = fatalsignal(sig, c, gp, mp)
level, _, docrash := gotraceback()
if level > 0 {
goroutineheader(gp)
tracebacktrap(c.sigpc(), c.sigsp(), c.siglr(), gp)
if crashing.Load() > 0 && gp != mp.curg && mp.curg != nil && readgstatus(mp.curg)&^_Gscan == _Grunning {
// tracebackothers on original m skipped this one; trace it now.
goroutineheader(mp.curg)
traceback(^uintptr(0), ^uintptr(0), 0, mp.curg)
} else if crashing.Load() == 0 {
tracebackothers(gp)
print("\n")
}
dumpregs(c)
}
if docrash {
var crashSleepMicros uint32 = 5000
var watchdogTimeoutMicros uint32 = 2000 * crashSleepMicros
isCrashThread := false
if crashing.CompareAndSwap(0, 1) {
isCrashThread = true
} else {
crashing.Add(1)
}
if crashing.Load() < mcount()-int32(extraMLength.Load()) {
// There are other m's that need to dump their stacks.
// Relay SIGQUIT to the next m by sending it to the current process.
// All m's that have already received SIGQUIT have signal masks blocking
// receipt of any signals, so the SIGQUIT will go to an m that hasn't seen it yet.
// The first m will wait until all ms received the SIGQUIT, then crash/exit.
// Just in case the relaying gets botched, each m involved in
// the relay sleeps for 5 seconds and then does the crash/exit itself.
// The faulting m is crashing first so it is the faulting thread in the core dump (see issue #63277):
// in expected operation, the first m will wait until the last m has received the SIGQUIT,
// and then run crash/exit and the process is gone.
// However, if it spends more than 10 seconds to send SIGQUIT to all ms,
// any of ms may crash/exit the process after waiting for 10 seconds.
print("\n-----\n\n")
raiseproc(_SIGQUIT)
}
if isCrashThread {
// Sleep for short intervals so that we can crash quickly after all ms have received SIGQUIT.
// Reset the timer whenever we see more ms received SIGQUIT
// to make it have enough time to crash (see issue #64752).
timeout := watchdogTimeoutMicros
maxCrashing := crashing.Load()
for timeout > 0 && (crashing.Load() < mcount()-int32(extraMLength.Load())) {
usleep(crashSleepMicros)
timeout -= crashSleepMicros
if c := crashing.Load(); c > maxCrashing {
// We make progress, so reset the watchdog timeout
maxCrashing = c
timeout = watchdogTimeoutMicros
}
}
} else {
maxCrashing := int32(0)
c := crashing.Load()
for c > maxCrashing {
maxCrashing = c
usleep(watchdogTimeoutMicros)
c = crashing.Load()
}
}
printDebugLog()
crash()
}
printDebugLog()
exit(2)
}
func fatalsignal(sig uint32, c *sigctxt, gp *g, mp *m) *g {
if sig < uint32(len(sigtable)) {
print(sigtable[sig].name, "\n")
} else {
print("Signal ", sig, "\n")
}
runtime: implement SUID/SGID protections On Unix platforms, the runtime previously did nothing special when a program was run with either the SUID or SGID bits set. This can be dangerous in certain cases, such as when dumping memory state, or assuming the status of standard i/o file descriptors. Taking cues from glibc, this change implements a set of protections when a binary is run with SUID or SGID bits set (or is SUID/SGID-like). On Linux, whether to enable these protections is determined by whether the AT_SECURE flag is passed in the auxiliary vector. On platforms which have the issetugid syscall (the BSDs, darwin, and Solaris/Illumos), that is used. On the remaining platforms (currently only AIX) we check !(getuid() == geteuid() && getgid == getegid()). Currently when we determine a binary is "tainted" (using the glibc terminology), we implement two specific protections: 1. we check if the file descriptors 0, 1, and 2 are open, and if they are not, we open them, pointing at /dev/null (or fail). 2. we force GOTRACKBACK=none, and generally prevent dumping of trackbacks and registers when a program panics/aborts. In the future we may add additional protections. This change requires implementing issetugid on the platforms which support it, and implementing getuid, geteuid, getgid, and getegid on AIX. Thanks to Vincent Dehors from Synacktiv for reporting this issue. Fixes #60272 Fixes CVE-2023-29403 Change-Id: I73fc93f2b7a8933c192ce3eabbf1db359db7d5fa Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/1878434 Reviewed-by: Damien Neil <dneil@google.com> Reviewed-by: Ian Lance Taylor <iant@google.com> Run-TryBot: Roland Shoemaker <bracewell@google.com> Reviewed-by: Russ Cox <rsc@google.com> Reviewed-on: https://go-review.googlesource.com/c/go/+/501223 Run-TryBot: David Chase <drchase@google.com> Reviewed-by: Michael Knyszek <mknyszek@google.com> TryBot-Result: Gopher Robot <gobot@golang.org>
2023-05-09 11:47:57 -07:00
if isSecureMode() {
exit(2)
}
print("PC=", hex(c.sigpc()), " m=", mp.id, " sigcode=", c.sigcode())
if sig == _SIGSEGV || sig == _SIGBUS {
print(" addr=", hex(c.fault()))
}
print("\n")
if mp.incgo && gp == mp.g0 && mp.curg != nil {
print("signal arrived during cgo execution\n")
// Switch to curg so that we get a traceback of the Go code
// leading up to the cgocall, which switched from curg to g0.
gp = mp.curg
}
if sig == _SIGILL || sig == _SIGFPE {
// It would be nice to know how long the instruction is.
// Unfortunately, that's complicated to do in general (mostly for x86
// and s930x, but other archs have non-standard instruction lengths also).
// Opt to print 16 bytes, which covers most instructions.
const maxN = 16
n := uintptr(maxN)
// We have to be careful, though. If we're near the end of
// a page and the following page isn't mapped, we could
// segfault. So make sure we don't straddle a page (even though
// that could lead to printing an incomplete instruction).
// We're assuming here we can read at least the page containing the PC.
// I suppose it is possible that the page is mapped executable but not readable?
pc := c.sigpc()
if n > physPageSize-pc%physPageSize {
n = physPageSize - pc%physPageSize
}
print("instruction bytes:")
b := (*[maxN]byte)(unsafe.Pointer(pc))
for i := uintptr(0); i < n; i++ {
print(" ", hex(b[i]))
}
println()
}
print("\n")
return gp
}
// sigpanic turns a synchronous signal into a run-time panic.
// If the signal handler sees a synchronous panic, it arranges the
// stack to look like the function where the signal occurred called
// sigpanic, sets the signal's PC value to sigpanic, and returns from
// the signal handler. The effect is that the program will act as
// though the function that got the signal simply called sigpanic
// instead.
//
// This must NOT be nosplit because the linker doesn't know where
// sigpanic calls can be injected.
//
// The signal handler must not inject a call to sigpanic if
// getg().throwsplit, since sigpanic may need to grow the stack.
//
// This is exported via linkname to assembly in runtime/cgo.
//
//go:linkname sigpanic
func sigpanic() {
gp := getg()
if !canpanic() {
throw("unexpected signal during runtime execution")
}
switch gp.sig {
case _SIGBUS:
if gp.sigcode0 == _BUS_ADRERR && gp.sigcode1 < 0x1000 {
panicmem()
}
// Support runtime/debug.SetPanicOnFault.
if gp.paniconfault {
panicmemAddr(gp.sigcode1)
}
print("unexpected fault address ", hex(gp.sigcode1), "\n")
throw("fault")
case _SIGSEGV:
if (gp.sigcode0 == 0 || gp.sigcode0 == _SEGV_MAPERR || gp.sigcode0 == _SEGV_ACCERR) && gp.sigcode1 < 0x1000 {
panicmem()
}
// Support runtime/debug.SetPanicOnFault.
if gp.paniconfault {
panicmemAddr(gp.sigcode1)
}
runtime: add safe arena support to the runtime This change adds an API to the runtime for arenas. A later CL can potentially export it as an experimental API, but for now, just the runtime implementation will suffice. The purpose of arenas is to improve efficiency, primarily by allowing for an application to manually free memory, thereby delaying garbage collection. It comes with other potential performance benefits, such as better locality, a better allocation strategy, and better handling of interior pointers by the GC. This implementation is based on one by danscales@google.com with a few significant differences: * The implementation lives entirely in the runtime (all layers). * Arena chunks are the minimum of 8 MiB or the heap arena size. This choice is made because in practice 64 MiB appears to be way too large of an area for most real-world use-cases. * Arena chunks are not unmapped, instead they're placed on an evacuation list and when there are no pointers left pointing into them, they're allowed to be reused. * Reusing partially-used arena chunks no longer tries to find one used by the same P first; it just takes the first one available. * In order to ensure worst-case fragmentation is never worse than 25%, only types and slice backing stores whose sizes are 1/4th the size of a chunk or less may be used. Previously larger sizes, up to the size of the chunk, were allowed. * ASAN, MSAN, and the race detector are fully supported. * Sets arena chunks to fault that were deferred at the end of mark termination (a non-public patch once did this; I don't see a reason not to continue that). For #51317. Change-Id: I83b1693a17302554cb36b6daa4e9249a81b1644f Reviewed-on: https://go-review.googlesource.com/c/go/+/423359 Reviewed-by: Cherry Mui <cherryyz@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Run-TryBot: Michael Knyszek <mknyszek@google.com>
2022-08-12 21:40:46 +00:00
if inUserArenaChunk(gp.sigcode1) {
// We could check that the arena chunk is explicitly set to fault,
// but the fact that we faulted on accessing it is enough to prove
// that it is.
print("accessed data from freed user arena ", hex(gp.sigcode1), "\n")
} else {
print("unexpected fault address ", hex(gp.sigcode1), "\n")
}
throw("fault")
case _SIGFPE:
switch gp.sigcode0 {
case _FPE_INTDIV:
panicdivide()
case _FPE_INTOVF:
panicoverflow()
}
panicfloat()
}
if gp.sig >= uint32(len(sigtable)) {
// can't happen: we looked up gp.sig in sigtable to decide to call sigpanic
throw("unexpected signal value")
}
panic(errorString(sigtable[gp.sig].name))
}
// dieFromSignal kills the program with a signal.
// This provides the expected exit status for the shell.
// This is only called with fatal signals expected to kill the process.
//
//go:nosplit
//go:nowritebarrierrec
func dieFromSignal(sig uint32) {
unblocksig(sig)
// Mark the signal as unhandled to ensure it is forwarded.
atomic.Store(&handlingSig[sig], 0)
raise(sig)
// That should have killed us. On some systems, though, raise
// sends the signal to the whole process rather than to just
// the current thread, which means that the signal may not yet
// have been delivered. Give other threads a chance to run and
// pick up the signal.
osyield()
osyield()
osyield()
// If that didn't work, try _SIG_DFL.
setsig(sig, _SIG_DFL)
raise(sig)
osyield()
osyield()
osyield()
// If we are still somehow running, just exit with the wrong status.
exit(2)
}
// raisebadsignal is called when a signal is received on a non-Go
// thread, and the Go program does not want to handle it (that is, the
// program has not called os/signal.Notify for the signal).
func raisebadsignal(sig uint32, c *sigctxt) {
if sig == _SIGPROF {
// Ignore profiling signals that arrive on non-Go threads.
return
}
var handler uintptr
var flags int32
if sig >= _NSIG {
handler = _SIG_DFL
} else {
handler = atomic.Loaduintptr(&fwdSig[sig])
flags = sigtable[sig].flags
}
// If the signal is ignored, raising the signal is no-op.
if handler == _SIG_IGN || (handler == _SIG_DFL && flags&_SigIgn != 0) {
return
}
// Reset the signal handler and raise the signal.
// We are currently running inside a signal handler, so the
// signal is blocked. We need to unblock it before raising the
// signal, or the signal we raise will be ignored until we return
// from the signal handler. We know that the signal was unblocked
// before entering the handler, or else we would not have received
// it. That means that we don't have to worry about blocking it
// again.
unblocksig(sig)
setsig(sig, handler)
// If we're linked into a non-Go program we want to try to
// avoid modifying the original context in which the signal
// was raised. If the handler is the default, we know it
// is non-recoverable, so we don't have to worry about
// re-installing sighandler. At this point we can just
// return and the signal will be re-raised and caught by
// the default handler with the correct context.
//
// On FreeBSD, the libthr sigaction code prevents
// this from working so we fall through to raise.
if GOOS != "freebsd" && (isarchive || islibrary) && handler == _SIG_DFL && !c.sigFromUser() {
return
}
raise(sig)
// Give the signal a chance to be delivered.
// In almost all real cases the program is about to crash,
// so sleeping here is not a waste of time.
usleep(1000)
// If the signal didn't cause the program to exit, restore the
// Go signal handler and carry on.
//
// We may receive another instance of the signal before we
// restore the Go handler, but that is not so bad: we know
// that the Go program has been ignoring the signal.
setsig(sig, abi.FuncPCABIInternal(sighandler))
}
runtime: perform crashes outside systemstack CL 93658 moved stack trace printing inside a systemstack call to sidestep complexity in case the runtime is in a inconsistent state. Unfortunately, debuggers generating backtraces for a Go panic will be confused and come up with a technical correct but useless stack. This CL moves just the crash performing - typically a SIGABRT signal - outside the systemstack call to improve backtraces. Unfortunately, the crash function now needs to be marked nosplit and that triggers the no split stackoverflow check. To work around that, split fatalpanic in two: fatalthrow for runtime.throw and fatalpanic for runtime.gopanic. Only Go panics really needs crashes on the right stack and there is enough stack for gopanic. Example program: package main import "runtime/debug" func main() { debug.SetTraceback("crash") crash() } func crash() { panic("panic!") } Before: (lldb) bt * thread #1, name = 'simple', stop reason = signal SIGABRT * frame #0: 0x000000000044ffe4 simple`runtime.raise at <autogenerated>:1 frame #1: 0x0000000000438cfb simple`runtime.dieFromSignal(sig=<unavailable>) at signal_unix.go:424 frame #2: 0x0000000000438ec9 simple`runtime.crash at signal_unix.go:525 frame #3: 0x00000000004268f5 simple`runtime.dopanic_m(gp=<unavailable>, pc=<unavailable>, sp=<unavailable>) at panic.go:758 frame #4: 0x000000000044bead simple`runtime.fatalpanic.func1 at panic.go:657 frame #5: 0x000000000044d066 simple`runtime.systemstack at <autogenerated>:1 frame #6: 0x000000000042a980 simple at proc.go:1094 frame #7: 0x0000000000438ec9 simple`runtime.crash at signal_unix.go:525 frame #8: 0x00000000004268f5 simple`runtime.dopanic_m(gp=<unavailable>, pc=<unavailable>, sp=<unavailable>) at panic.go:758 frame #9: 0x000000000044bead simple`runtime.fatalpanic.func1 at panic.go:657 frame #10: 0x000000000044d066 simple`runtime.systemstack at <autogenerated>:1 frame #11: 0x000000000042a980 simple at proc.go:1094 frame #12: 0x00000000004268f5 simple`runtime.dopanic_m(gp=<unavailable>, pc=<unavailable>, sp=<unavailable>) at panic.go:758 frame #13: 0x000000000044bead simple`runtime.fatalpanic.func1 at panic.go:657 frame #14: 0x000000000044d066 simple`runtime.systemstack at <autogenerated>:1 frame #15: 0x000000000042a980 simple at proc.go:1094 frame #16: 0x000000000044bead simple`runtime.fatalpanic.func1 at panic.go:657 frame #17: 0x000000000044d066 simple`runtime.systemstack at <autogenerated>:1 After: (lldb) bt * thread #7, stop reason = signal SIGABRT * frame #0: 0x0000000000450024 simple`runtime.raise at <autogenerated>:1 frame #1: 0x0000000000438d1b simple`runtime.dieFromSignal(sig=<unavailable>) at signal_unix.go:424 frame #2: 0x0000000000438ee9 simple`runtime.crash at signal_unix.go:525 frame #3: 0x00000000004264e3 simple`runtime.fatalpanic(msgs=<unavailable>) at panic.go:664 frame #4: 0x0000000000425f1b simple`runtime.gopanic(e=<unavailable>) at panic.go:537 frame #5: 0x0000000000470c62 simple`main.crash at simple.go:11 frame #6: 0x0000000000470c00 simple`main.main at simple.go:6 frame #7: 0x0000000000427be7 simple`runtime.main at proc.go:198 frame #8: 0x000000000044ef91 simple`runtime.goexit at <autogenerated>:1 Updates #22716 Change-Id: Ib5fa35c13662c1dac2f1eac8b59c4a5824b98d92 Reviewed-on: https://go-review.googlesource.com/110065 Run-TryBot: Elias Naur <elias.naur@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2018-04-29 16:29:43 +02:00
//go:nosplit
func crash() {
dieFromSignal(_SIGABRT)
}
// ensureSigM starts one global, sleeping thread to make sure at least one thread
// is available to catch signals enabled for os/signal.
func ensureSigM() {
if maskUpdatedChan != nil {
return
}
maskUpdatedChan = make(chan struct{})
disableSigChan = make(chan uint32)
enableSigChan = make(chan uint32)
go func() {
// Signal masks are per-thread, so make sure this goroutine stays on one
// thread.
LockOSThread()
defer UnlockOSThread()
// The sigBlocked mask contains the signals not active for os/signal,
// initially all signals except the essential. When signal.Notify()/Stop is called,
// sigenable/sigdisable in turn notify this thread to update its signal
// mask accordingly.
sigBlocked := sigset_all
for i := range sigtable {
if !blockableSig(uint32(i)) {
sigdelset(&sigBlocked, i)
}
}
sigprocmask(_SIG_SETMASK, &sigBlocked, nil)
for {
select {
case sig := <-enableSigChan:
if sig > 0 {
sigdelset(&sigBlocked, int(sig))
}
case sig := <-disableSigChan:
if sig > 0 && blockableSig(sig) {
sigaddset(&sigBlocked, int(sig))
}
}
sigprocmask(_SIG_SETMASK, &sigBlocked, nil)
maskUpdatedChan <- struct{}{}
}
}()
}
// This is called when we receive a signal when there is no signal stack.
// This can only happen if non-Go code calls sigaltstack to disable the
// signal stack.
func noSignalStack(sig uint32) {
println("signal", sig, "received on thread with no signal stack")
throw("non-Go code disabled sigaltstack")
}
// This is called if we receive a signal when there is a signal stack
// but we are not on it. This can only happen if non-Go code called
// sigaction without setting the SS_ONSTACK flag.
runtime/cgo: store M for C-created thread in pthread key This reapplies CL 485500, with a fix drafted in CL 492987 incorporated. CL 485500 is reverted due to #60004 and #60007. #60004 is fixed in CL 492743. #60007 is fixed in CL 492987 (incorporated in this CL). [Original CL 485500 description] This reapplies CL 481061, with the followup fixes in CL 482975, CL 485315, and CL 485316 incorporated. CL 481061, by doujiang24 <doujiang24@gmail.com>, speed up C to Go calls by binding the M to the C thread. See below for its description. CL 482975 is a followup fix to a C declaration in testprogcgo. CL 485315 is a followup fix for x_cgo_getstackbound on Illumos. CL 485316 is a followup cleanup for ppc64 assembly. CL 479915 passed the G to _cgo_getstackbound for direct updates to gp.stack.lo. A G can be reused on a new thread after the previous thread exited. This could trigger the C TSAN race detector because it couldn't see the synchronization in Go (lockextra) preventing the same G from being used on multiple threads at the same time. We work around this by passing the address of a stack variable to _cgo_getstackbound rather than the G. The stack is generally unique per thread, so TSAN won't see the same address from multiple threads. Even if stacks are reused across threads by pthread, C TSAN should see the synchonization in the stack allocator. A regression test is added to misc/cgo/testsanitizer. [Original CL 481061 description] This reapplies CL 392854, with the followup fixes in CL 479255, CL 479915, and CL 481057 incorporated. CL 392854, by doujiang24 <doujiang24@gmail.com>, speed up C to Go calls by binding the M to the C thread. See below for its description. CL 479255 is a followup fix for a small bug in ARM assembly code. CL 479915 is another followup fix to address C to Go calls after the C code uses some stack, but that CL is also buggy. CL 481057, by Michael Knyszek, is a followup fix for a memory leak bug of CL 479915. [Original CL 392854 description] In a C thread, it's necessary to acquire an extra M by using needm while invoking a Go function from C. But, needm and dropm are heavy costs due to the signal-related syscalls. So, we change to not dropm while returning back to C, which means binding the extra M to the C thread until it exits, to avoid needm and dropm on each C to Go call. Instead, we only dropm while the C thread exits, so the extra M won't leak. When invoking a Go function from C: Allocate a pthread variable using pthread_key_create, only once per shared object, and register a thread-exit-time destructor. And store the g0 of the current m into the thread-specified value of the pthread key, only once per C thread, so that the destructor will put the extra M back onto the extra M list while the C thread exits. When returning back to C: Skip dropm in cgocallback, when the pthread variable has been created, so that the extra M will be reused the next time invoke a Go function from C. This is purely a performance optimization. The old version, in which needm & dropm happen on each cgo call, is still correct too, and we have to keep the old version on systems with cgo but without pthreads, like Windows. This optimization is significant, and the specific value depends on the OS system and CPU, but in general, it can be considered as 10x faster, for a simple Go function call from a C thread. For the newly added BenchmarkCGoInCThread, some benchmark results: 1. it's 28x faster, from 3395 ns/op to 121 ns/op, in darwin OS & Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz 2. it's 6.5x faster, from 1495 ns/op to 230 ns/op, in Linux OS & Intel(R) Xeon(R) CPU E5-2630 0 @ 2.30GHz [CL 479915 description] Currently, when C calls into Go the first time, we grab an M using needm, which sets m.g0's stack bounds using the SP. We don't know how big the stack is, so we simply assume 32K. Previously, when the Go function returns to C, we drop the M, and the next time C calls into Go, we put a new stack bound on the g0 based on the current SP. After CL 392854, we don't drop the M, and the next time C calls into Go, we reuse the same g0, without recomputing the stack bounds. If the C code uses quite a bit of stack space before calling into Go, the SP may be well below the 32K stack bound we assumed, so the runtime thinks the g0 stack overflows. This CL makes needm get a more accurate stack bound from pthread. (In some platforms this may still be a guess as we don't know exactly where we are in the C stack), but it is probably better than simply assuming 32K. [CL 492987 description] On the first call into Go from a C thread, currently we set the g0 stack's high bound imprecisely based on the SP. With CL 485500, we keep the M and don't recompute the stack bounds when it calls into Go again. If the first call is made when the C thread uses some deep stack, but a subsequent call is made with a shallower stack, the SP may be above g0.stack.hi. This is usually okay as we don't check usually stack.hi. One place where we do check for stack.hi is in the signal handler, in adjustSignalStack. In particular, C TSAN delivers signals on the g0 stack (instead of the usual signal stack). If the SP is above g0.stack.hi, we don't see it is on the g0 stack, and throws. This CL makes it get an accurate stack upper bound with the pthread API (on the platforms where it is available). Also add some debug print for the "handler not on signal stack" throw. Fixes #51676. Fixes #59294. Fixes #59678. Fixes #60007. Change-Id: Ie51c8e81ade34ec81d69fd7bce1fe0039a470776 Reviewed-on: https://go-review.googlesource.com/c/go/+/495855 Run-TryBot: Cherry Mui <cherryyz@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Michael Pratt <mpratt@google.com>
2023-05-17 12:01:15 -04:00
func sigNotOnStack(sig uint32, sp uintptr, mp *m) {
println("signal", sig, "received but handler not on signal stack")
runtime/cgo: store M for C-created thread in pthread key This reapplies CL 485500, with a fix drafted in CL 492987 incorporated. CL 485500 is reverted due to #60004 and #60007. #60004 is fixed in CL 492743. #60007 is fixed in CL 492987 (incorporated in this CL). [Original CL 485500 description] This reapplies CL 481061, with the followup fixes in CL 482975, CL 485315, and CL 485316 incorporated. CL 481061, by doujiang24 <doujiang24@gmail.com>, speed up C to Go calls by binding the M to the C thread. See below for its description. CL 482975 is a followup fix to a C declaration in testprogcgo. CL 485315 is a followup fix for x_cgo_getstackbound on Illumos. CL 485316 is a followup cleanup for ppc64 assembly. CL 479915 passed the G to _cgo_getstackbound for direct updates to gp.stack.lo. A G can be reused on a new thread after the previous thread exited. This could trigger the C TSAN race detector because it couldn't see the synchronization in Go (lockextra) preventing the same G from being used on multiple threads at the same time. We work around this by passing the address of a stack variable to _cgo_getstackbound rather than the G. The stack is generally unique per thread, so TSAN won't see the same address from multiple threads. Even if stacks are reused across threads by pthread, C TSAN should see the synchonization in the stack allocator. A regression test is added to misc/cgo/testsanitizer. [Original CL 481061 description] This reapplies CL 392854, with the followup fixes in CL 479255, CL 479915, and CL 481057 incorporated. CL 392854, by doujiang24 <doujiang24@gmail.com>, speed up C to Go calls by binding the M to the C thread. See below for its description. CL 479255 is a followup fix for a small bug in ARM assembly code. CL 479915 is another followup fix to address C to Go calls after the C code uses some stack, but that CL is also buggy. CL 481057, by Michael Knyszek, is a followup fix for a memory leak bug of CL 479915. [Original CL 392854 description] In a C thread, it's necessary to acquire an extra M by using needm while invoking a Go function from C. But, needm and dropm are heavy costs due to the signal-related syscalls. So, we change to not dropm while returning back to C, which means binding the extra M to the C thread until it exits, to avoid needm and dropm on each C to Go call. Instead, we only dropm while the C thread exits, so the extra M won't leak. When invoking a Go function from C: Allocate a pthread variable using pthread_key_create, only once per shared object, and register a thread-exit-time destructor. And store the g0 of the current m into the thread-specified value of the pthread key, only once per C thread, so that the destructor will put the extra M back onto the extra M list while the C thread exits. When returning back to C: Skip dropm in cgocallback, when the pthread variable has been created, so that the extra M will be reused the next time invoke a Go function from C. This is purely a performance optimization. The old version, in which needm & dropm happen on each cgo call, is still correct too, and we have to keep the old version on systems with cgo but without pthreads, like Windows. This optimization is significant, and the specific value depends on the OS system and CPU, but in general, it can be considered as 10x faster, for a simple Go function call from a C thread. For the newly added BenchmarkCGoInCThread, some benchmark results: 1. it's 28x faster, from 3395 ns/op to 121 ns/op, in darwin OS & Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz 2. it's 6.5x faster, from 1495 ns/op to 230 ns/op, in Linux OS & Intel(R) Xeon(R) CPU E5-2630 0 @ 2.30GHz [CL 479915 description] Currently, when C calls into Go the first time, we grab an M using needm, which sets m.g0's stack bounds using the SP. We don't know how big the stack is, so we simply assume 32K. Previously, when the Go function returns to C, we drop the M, and the next time C calls into Go, we put a new stack bound on the g0 based on the current SP. After CL 392854, we don't drop the M, and the next time C calls into Go, we reuse the same g0, without recomputing the stack bounds. If the C code uses quite a bit of stack space before calling into Go, the SP may be well below the 32K stack bound we assumed, so the runtime thinks the g0 stack overflows. This CL makes needm get a more accurate stack bound from pthread. (In some platforms this may still be a guess as we don't know exactly where we are in the C stack), but it is probably better than simply assuming 32K. [CL 492987 description] On the first call into Go from a C thread, currently we set the g0 stack's high bound imprecisely based on the SP. With CL 485500, we keep the M and don't recompute the stack bounds when it calls into Go again. If the first call is made when the C thread uses some deep stack, but a subsequent call is made with a shallower stack, the SP may be above g0.stack.hi. This is usually okay as we don't check usually stack.hi. One place where we do check for stack.hi is in the signal handler, in adjustSignalStack. In particular, C TSAN delivers signals on the g0 stack (instead of the usual signal stack). If the SP is above g0.stack.hi, we don't see it is on the g0 stack, and throws. This CL makes it get an accurate stack upper bound with the pthread API (on the platforms where it is available). Also add some debug print for the "handler not on signal stack" throw. Fixes #51676. Fixes #59294. Fixes #59678. Fixes #60007. Change-Id: Ie51c8e81ade34ec81d69fd7bce1fe0039a470776 Reviewed-on: https://go-review.googlesource.com/c/go/+/495855 Run-TryBot: Cherry Mui <cherryyz@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Michael Pratt <mpratt@google.com>
2023-05-17 12:01:15 -04:00
print("mp.gsignal stack [", hex(mp.gsignal.stack.lo), " ", hex(mp.gsignal.stack.hi), "], ")
print("mp.g0 stack [", hex(mp.g0.stack.lo), " ", hex(mp.g0.stack.hi), "], sp=", hex(sp), "\n")
throw("non-Go code set up signal handler without SA_ONSTACK flag")
}
// signalDuringFork is called if we receive a signal while doing a fork.
// We do not want signals at that time, as a signal sent to the process
// group may be delivered to the child process, causing confusion.
// This should never be called, because we block signals across the fork;
// this function is just a safety check. See issue 18600 for background.
func signalDuringFork(sig uint32) {
println("signal", sig, "received during fork")
throw("signal received during fork")
}
// This runs on a foreign stack, without an m or a g. No stack split.
//
//go:nosplit
//go:norace
//go:nowritebarrierrec
func badsignal(sig uintptr, c *sigctxt) {
if !iscgo && !cgoHasExtraM {
// There is no extra M. needm will not be able to grab
// an M. Instead of hanging, just crash.
// Cannot call split-stack function as there is no G.
writeErrStr("fatal: bad g in signal handler\n")
exit(2)
*(*uintptr)(unsafe.Pointer(uintptr(123))) = 2
}
runtime/cgo: store M for C-created thread in pthread key This reapplies CL 485500, with a fix drafted in CL 492987 incorporated. CL 485500 is reverted due to #60004 and #60007. #60004 is fixed in CL 492743. #60007 is fixed in CL 492987 (incorporated in this CL). [Original CL 485500 description] This reapplies CL 481061, with the followup fixes in CL 482975, CL 485315, and CL 485316 incorporated. CL 481061, by doujiang24 <doujiang24@gmail.com>, speed up C to Go calls by binding the M to the C thread. See below for its description. CL 482975 is a followup fix to a C declaration in testprogcgo. CL 485315 is a followup fix for x_cgo_getstackbound on Illumos. CL 485316 is a followup cleanup for ppc64 assembly. CL 479915 passed the G to _cgo_getstackbound for direct updates to gp.stack.lo. A G can be reused on a new thread after the previous thread exited. This could trigger the C TSAN race detector because it couldn't see the synchronization in Go (lockextra) preventing the same G from being used on multiple threads at the same time. We work around this by passing the address of a stack variable to _cgo_getstackbound rather than the G. The stack is generally unique per thread, so TSAN won't see the same address from multiple threads. Even if stacks are reused across threads by pthread, C TSAN should see the synchonization in the stack allocator. A regression test is added to misc/cgo/testsanitizer. [Original CL 481061 description] This reapplies CL 392854, with the followup fixes in CL 479255, CL 479915, and CL 481057 incorporated. CL 392854, by doujiang24 <doujiang24@gmail.com>, speed up C to Go calls by binding the M to the C thread. See below for its description. CL 479255 is a followup fix for a small bug in ARM assembly code. CL 479915 is another followup fix to address C to Go calls after the C code uses some stack, but that CL is also buggy. CL 481057, by Michael Knyszek, is a followup fix for a memory leak bug of CL 479915. [Original CL 392854 description] In a C thread, it's necessary to acquire an extra M by using needm while invoking a Go function from C. But, needm and dropm are heavy costs due to the signal-related syscalls. So, we change to not dropm while returning back to C, which means binding the extra M to the C thread until it exits, to avoid needm and dropm on each C to Go call. Instead, we only dropm while the C thread exits, so the extra M won't leak. When invoking a Go function from C: Allocate a pthread variable using pthread_key_create, only once per shared object, and register a thread-exit-time destructor. And store the g0 of the current m into the thread-specified value of the pthread key, only once per C thread, so that the destructor will put the extra M back onto the extra M list while the C thread exits. When returning back to C: Skip dropm in cgocallback, when the pthread variable has been created, so that the extra M will be reused the next time invoke a Go function from C. This is purely a performance optimization. The old version, in which needm & dropm happen on each cgo call, is still correct too, and we have to keep the old version on systems with cgo but without pthreads, like Windows. This optimization is significant, and the specific value depends on the OS system and CPU, but in general, it can be considered as 10x faster, for a simple Go function call from a C thread. For the newly added BenchmarkCGoInCThread, some benchmark results: 1. it's 28x faster, from 3395 ns/op to 121 ns/op, in darwin OS & Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz 2. it's 6.5x faster, from 1495 ns/op to 230 ns/op, in Linux OS & Intel(R) Xeon(R) CPU E5-2630 0 @ 2.30GHz [CL 479915 description] Currently, when C calls into Go the first time, we grab an M using needm, which sets m.g0's stack bounds using the SP. We don't know how big the stack is, so we simply assume 32K. Previously, when the Go function returns to C, we drop the M, and the next time C calls into Go, we put a new stack bound on the g0 based on the current SP. After CL 392854, we don't drop the M, and the next time C calls into Go, we reuse the same g0, without recomputing the stack bounds. If the C code uses quite a bit of stack space before calling into Go, the SP may be well below the 32K stack bound we assumed, so the runtime thinks the g0 stack overflows. This CL makes needm get a more accurate stack bound from pthread. (In some platforms this may still be a guess as we don't know exactly where we are in the C stack), but it is probably better than simply assuming 32K. [CL 492987 description] On the first call into Go from a C thread, currently we set the g0 stack's high bound imprecisely based on the SP. With CL 485500, we keep the M and don't recompute the stack bounds when it calls into Go again. If the first call is made when the C thread uses some deep stack, but a subsequent call is made with a shallower stack, the SP may be above g0.stack.hi. This is usually okay as we don't check usually stack.hi. One place where we do check for stack.hi is in the signal handler, in adjustSignalStack. In particular, C TSAN delivers signals on the g0 stack (instead of the usual signal stack). If the SP is above g0.stack.hi, we don't see it is on the g0 stack, and throws. This CL makes it get an accurate stack upper bound with the pthread API (on the platforms where it is available). Also add some debug print for the "handler not on signal stack" throw. Fixes #51676. Fixes #59294. Fixes #59678. Fixes #60007. Change-Id: Ie51c8e81ade34ec81d69fd7bce1fe0039a470776 Reviewed-on: https://go-review.googlesource.com/c/go/+/495855 Run-TryBot: Cherry Mui <cherryyz@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Michael Pratt <mpratt@google.com>
2023-05-17 12:01:15 -04:00
needm(true)
if !sigsend(uint32(sig)) {
// A foreign thread received the signal sig, and the
// Go code does not want to handle it.
raisebadsignal(uint32(sig), c)
}
dropm()
}
//go:noescape
func sigfwd(fn uintptr, sig uint32, info *siginfo, ctx unsafe.Pointer)
// Determines if the signal should be handled by Go and if not, forwards the
// signal to the handler that was installed before Go's. Returns whether the
// signal was forwarded.
// This is called by the signal handler, and the world may be stopped.
//
//go:nosplit
//go:nowritebarrierrec
func sigfwdgo(sig uint32, info *siginfo, ctx unsafe.Pointer) bool {
if sig >= uint32(len(sigtable)) {
return false
}
fwdFn := atomic.Loaduintptr(&fwdSig[sig])
flags := sigtable[sig].flags
// If we aren't handling the signal, forward it.
if atomic.Load(&handlingSig[sig]) == 0 || !signalsOK {
// If the signal is ignored, doing nothing is the same as forwarding.
if fwdFn == _SIG_IGN || (fwdFn == _SIG_DFL && flags&_SigIgn != 0) {
return true
}
// We are not handling the signal and there is no other handler to forward to.
// Crash with the default behavior.
if fwdFn == _SIG_DFL {
setsig(sig, _SIG_DFL)
dieFromSignal(sig)
return false
}
sigfwd(fwdFn, sig, info, ctx)
return true
}
// This function and its caller sigtrampgo assumes SIGPIPE is delivered on the
// originating thread. This property does not hold on macOS (golang.org/issue/33384),
// so we have no choice but to ignore SIGPIPE.
if (GOOS == "darwin" || GOOS == "ios") && sig == _SIGPIPE {
return true
}
// If there is no handler to forward to, no need to forward.
if fwdFn == _SIG_DFL {
return false
}
c := &sigctxt{info, ctx}
// Only forward synchronous signals and SIGPIPE.
// Unfortunately, user generated SIGPIPEs will also be forwarded, because si_code
// is set to _SI_USER even for a SIGPIPE raised from a write to a closed socket
// or pipe.
if (c.sigFromUser() || flags&_SigPanic == 0) && sig != _SIGPIPE {
return false
}
// Determine if the signal occurred inside Go code. We test that:
// (1) we weren't in VDSO page,
// (2) we were in a goroutine (i.e., m.curg != nil), and
// (3) we weren't in CGO.
runtime/cgo: store M for C-created thread in pthread key This reapplies CL 485500, with a fix drafted in CL 492987 incorporated. CL 485500 is reverted due to #60004 and #60007. #60004 is fixed in CL 492743. #60007 is fixed in CL 492987 (incorporated in this CL). [Original CL 485500 description] This reapplies CL 481061, with the followup fixes in CL 482975, CL 485315, and CL 485316 incorporated. CL 481061, by doujiang24 <doujiang24@gmail.com>, speed up C to Go calls by binding the M to the C thread. See below for its description. CL 482975 is a followup fix to a C declaration in testprogcgo. CL 485315 is a followup fix for x_cgo_getstackbound on Illumos. CL 485316 is a followup cleanup for ppc64 assembly. CL 479915 passed the G to _cgo_getstackbound for direct updates to gp.stack.lo. A G can be reused on a new thread after the previous thread exited. This could trigger the C TSAN race detector because it couldn't see the synchronization in Go (lockextra) preventing the same G from being used on multiple threads at the same time. We work around this by passing the address of a stack variable to _cgo_getstackbound rather than the G. The stack is generally unique per thread, so TSAN won't see the same address from multiple threads. Even if stacks are reused across threads by pthread, C TSAN should see the synchonization in the stack allocator. A regression test is added to misc/cgo/testsanitizer. [Original CL 481061 description] This reapplies CL 392854, with the followup fixes in CL 479255, CL 479915, and CL 481057 incorporated. CL 392854, by doujiang24 <doujiang24@gmail.com>, speed up C to Go calls by binding the M to the C thread. See below for its description. CL 479255 is a followup fix for a small bug in ARM assembly code. CL 479915 is another followup fix to address C to Go calls after the C code uses some stack, but that CL is also buggy. CL 481057, by Michael Knyszek, is a followup fix for a memory leak bug of CL 479915. [Original CL 392854 description] In a C thread, it's necessary to acquire an extra M by using needm while invoking a Go function from C. But, needm and dropm are heavy costs due to the signal-related syscalls. So, we change to not dropm while returning back to C, which means binding the extra M to the C thread until it exits, to avoid needm and dropm on each C to Go call. Instead, we only dropm while the C thread exits, so the extra M won't leak. When invoking a Go function from C: Allocate a pthread variable using pthread_key_create, only once per shared object, and register a thread-exit-time destructor. And store the g0 of the current m into the thread-specified value of the pthread key, only once per C thread, so that the destructor will put the extra M back onto the extra M list while the C thread exits. When returning back to C: Skip dropm in cgocallback, when the pthread variable has been created, so that the extra M will be reused the next time invoke a Go function from C. This is purely a performance optimization. The old version, in which needm & dropm happen on each cgo call, is still correct too, and we have to keep the old version on systems with cgo but without pthreads, like Windows. This optimization is significant, and the specific value depends on the OS system and CPU, but in general, it can be considered as 10x faster, for a simple Go function call from a C thread. For the newly added BenchmarkCGoInCThread, some benchmark results: 1. it's 28x faster, from 3395 ns/op to 121 ns/op, in darwin OS & Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz 2. it's 6.5x faster, from 1495 ns/op to 230 ns/op, in Linux OS & Intel(R) Xeon(R) CPU E5-2630 0 @ 2.30GHz [CL 479915 description] Currently, when C calls into Go the first time, we grab an M using needm, which sets m.g0's stack bounds using the SP. We don't know how big the stack is, so we simply assume 32K. Previously, when the Go function returns to C, we drop the M, and the next time C calls into Go, we put a new stack bound on the g0 based on the current SP. After CL 392854, we don't drop the M, and the next time C calls into Go, we reuse the same g0, without recomputing the stack bounds. If the C code uses quite a bit of stack space before calling into Go, the SP may be well below the 32K stack bound we assumed, so the runtime thinks the g0 stack overflows. This CL makes needm get a more accurate stack bound from pthread. (In some platforms this may still be a guess as we don't know exactly where we are in the C stack), but it is probably better than simply assuming 32K. [CL 492987 description] On the first call into Go from a C thread, currently we set the g0 stack's high bound imprecisely based on the SP. With CL 485500, we keep the M and don't recompute the stack bounds when it calls into Go again. If the first call is made when the C thread uses some deep stack, but a subsequent call is made with a shallower stack, the SP may be above g0.stack.hi. This is usually okay as we don't check usually stack.hi. One place where we do check for stack.hi is in the signal handler, in adjustSignalStack. In particular, C TSAN delivers signals on the g0 stack (instead of the usual signal stack). If the SP is above g0.stack.hi, we don't see it is on the g0 stack, and throws. This CL makes it get an accurate stack upper bound with the pthread API (on the platforms where it is available). Also add some debug print for the "handler not on signal stack" throw. Fixes #51676. Fixes #59294. Fixes #59678. Fixes #60007. Change-Id: Ie51c8e81ade34ec81d69fd7bce1fe0039a470776 Reviewed-on: https://go-review.googlesource.com/c/go/+/495855 Run-TryBot: Cherry Mui <cherryyz@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Michael Pratt <mpratt@google.com>
2023-05-17 12:01:15 -04:00
// (4) we weren't in dropped extra m.
gp := sigFetchG(c)
runtime/cgo: store M for C-created thread in pthread key This reapplies CL 485500, with a fix drafted in CL 492987 incorporated. CL 485500 is reverted due to #60004 and #60007. #60004 is fixed in CL 492743. #60007 is fixed in CL 492987 (incorporated in this CL). [Original CL 485500 description] This reapplies CL 481061, with the followup fixes in CL 482975, CL 485315, and CL 485316 incorporated. CL 481061, by doujiang24 <doujiang24@gmail.com>, speed up C to Go calls by binding the M to the C thread. See below for its description. CL 482975 is a followup fix to a C declaration in testprogcgo. CL 485315 is a followup fix for x_cgo_getstackbound on Illumos. CL 485316 is a followup cleanup for ppc64 assembly. CL 479915 passed the G to _cgo_getstackbound for direct updates to gp.stack.lo. A G can be reused on a new thread after the previous thread exited. This could trigger the C TSAN race detector because it couldn't see the synchronization in Go (lockextra) preventing the same G from being used on multiple threads at the same time. We work around this by passing the address of a stack variable to _cgo_getstackbound rather than the G. The stack is generally unique per thread, so TSAN won't see the same address from multiple threads. Even if stacks are reused across threads by pthread, C TSAN should see the synchonization in the stack allocator. A regression test is added to misc/cgo/testsanitizer. [Original CL 481061 description] This reapplies CL 392854, with the followup fixes in CL 479255, CL 479915, and CL 481057 incorporated. CL 392854, by doujiang24 <doujiang24@gmail.com>, speed up C to Go calls by binding the M to the C thread. See below for its description. CL 479255 is a followup fix for a small bug in ARM assembly code. CL 479915 is another followup fix to address C to Go calls after the C code uses some stack, but that CL is also buggy. CL 481057, by Michael Knyszek, is a followup fix for a memory leak bug of CL 479915. [Original CL 392854 description] In a C thread, it's necessary to acquire an extra M by using needm while invoking a Go function from C. But, needm and dropm are heavy costs due to the signal-related syscalls. So, we change to not dropm while returning back to C, which means binding the extra M to the C thread until it exits, to avoid needm and dropm on each C to Go call. Instead, we only dropm while the C thread exits, so the extra M won't leak. When invoking a Go function from C: Allocate a pthread variable using pthread_key_create, only once per shared object, and register a thread-exit-time destructor. And store the g0 of the current m into the thread-specified value of the pthread key, only once per C thread, so that the destructor will put the extra M back onto the extra M list while the C thread exits. When returning back to C: Skip dropm in cgocallback, when the pthread variable has been created, so that the extra M will be reused the next time invoke a Go function from C. This is purely a performance optimization. The old version, in which needm & dropm happen on each cgo call, is still correct too, and we have to keep the old version on systems with cgo but without pthreads, like Windows. This optimization is significant, and the specific value depends on the OS system and CPU, but in general, it can be considered as 10x faster, for a simple Go function call from a C thread. For the newly added BenchmarkCGoInCThread, some benchmark results: 1. it's 28x faster, from 3395 ns/op to 121 ns/op, in darwin OS & Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz 2. it's 6.5x faster, from 1495 ns/op to 230 ns/op, in Linux OS & Intel(R) Xeon(R) CPU E5-2630 0 @ 2.30GHz [CL 479915 description] Currently, when C calls into Go the first time, we grab an M using needm, which sets m.g0's stack bounds using the SP. We don't know how big the stack is, so we simply assume 32K. Previously, when the Go function returns to C, we drop the M, and the next time C calls into Go, we put a new stack bound on the g0 based on the current SP. After CL 392854, we don't drop the M, and the next time C calls into Go, we reuse the same g0, without recomputing the stack bounds. If the C code uses quite a bit of stack space before calling into Go, the SP may be well below the 32K stack bound we assumed, so the runtime thinks the g0 stack overflows. This CL makes needm get a more accurate stack bound from pthread. (In some platforms this may still be a guess as we don't know exactly where we are in the C stack), but it is probably better than simply assuming 32K. [CL 492987 description] On the first call into Go from a C thread, currently we set the g0 stack's high bound imprecisely based on the SP. With CL 485500, we keep the M and don't recompute the stack bounds when it calls into Go again. If the first call is made when the C thread uses some deep stack, but a subsequent call is made with a shallower stack, the SP may be above g0.stack.hi. This is usually okay as we don't check usually stack.hi. One place where we do check for stack.hi is in the signal handler, in adjustSignalStack. In particular, C TSAN delivers signals on the g0 stack (instead of the usual signal stack). If the SP is above g0.stack.hi, we don't see it is on the g0 stack, and throws. This CL makes it get an accurate stack upper bound with the pthread API (on the platforms where it is available). Also add some debug print for the "handler not on signal stack" throw. Fixes #51676. Fixes #59294. Fixes #59678. Fixes #60007. Change-Id: Ie51c8e81ade34ec81d69fd7bce1fe0039a470776 Reviewed-on: https://go-review.googlesource.com/c/go/+/495855 Run-TryBot: Cherry Mui <cherryyz@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Michael Pratt <mpratt@google.com>
2023-05-17 12:01:15 -04:00
if gp != nil && gp.m != nil && gp.m.curg != nil && !gp.m.isExtraInC && !gp.m.incgo {
return false
}
// Signal not handled by Go, forward it.
if fwdFn != _SIG_IGN {
sigfwd(fwdFn, sig, info, ctx)
}
return true
}
// sigsave saves the current thread's signal mask into *p.
// This is used to preserve the non-Go signal mask when a non-Go
// thread calls a Go function.
// This is nosplit and nowritebarrierrec because it is called by needm
// which may be called on a non-Go thread with no g available.
//
//go:nosplit
//go:nowritebarrierrec
func sigsave(p *sigset) {
sigprocmask(_SIG_SETMASK, nil, p)
}
// msigrestore sets the current thread's signal mask to sigmask.
// This is used to restore the non-Go signal mask when a non-Go thread
// calls a Go function.
// This is nosplit and nowritebarrierrec because it is called by dropm
// after g has been cleared.
//
//go:nosplit
//go:nowritebarrierrec
func msigrestore(sigmask sigset) {
sigprocmask(_SIG_SETMASK, &sigmask, nil)
}
// sigsetAllExiting is used by sigblock(true) when a thread is
// exiting.
var sigsetAllExiting = func() sigset {
res := sigset_all
// Apply GOOS-specific overrides here, rather than in osinit,
// because osinit may be called before sigsetAllExiting is
// initialized (#51913).
if GOOS == "linux" && iscgo {
// #42494 glibc and musl reserve some signals for
// internal use and require they not be blocked by
// the rest of a normal C runtime. When the go runtime
// blocks...unblocks signals, temporarily, the blocked
// interval of time is generally very short. As such,
// these expectations of *libc code are mostly met by
// the combined go+cgo system of threads. However,
// when go causes a thread to exit, via a return from
// mstart(), the combined runtime can deadlock if
// these signals are blocked. Thus, don't block these
// signals when exiting threads.
// - glibc: SIGCANCEL (32), SIGSETXID (33)
// - musl: SIGTIMER (32), SIGCANCEL (33), SIGSYNCCALL (34)
sigdelset(&res, 32)
sigdelset(&res, 33)
sigdelset(&res, 34)
}
return res
}()
// sigblock blocks signals in the current thread's signal mask.
// This is used to block signals while setting up and tearing down g
// when a non-Go thread calls a Go function. When a thread is exiting
// we use the sigsetAllExiting value, otherwise the OS specific
// definition of sigset_all is used.
// This is nosplit and nowritebarrierrec because it is called by needm
// which may be called on a non-Go thread with no g available.
//
//go:nosplit
//go:nowritebarrierrec
func sigblock(exiting bool) {
if exiting {
sigprocmask(_SIG_SETMASK, &sigsetAllExiting, nil)
return
}
sigprocmask(_SIG_SETMASK, &sigset_all, nil)
}
// unblocksig removes sig from the current thread's signal mask.
// This is nosplit and nowritebarrierrec because it is called from
// dieFromSignal, which can be called by sigfwdgo while running in the
// signal handler, on the signal stack, with no g available.
//
//go:nosplit
//go:nowritebarrierrec
func unblocksig(sig uint32) {
var set sigset
sigaddset(&set, int(sig))
sigprocmask(_SIG_UNBLOCK, &set, nil)
}
// minitSignals is called when initializing a new m to set the
// thread's alternate signal stack and signal mask.
func minitSignals() {
minitSignalStack()
minitSignalMask()
}
// minitSignalStack is called when initializing a new m to set the
// alternate signal stack. If the alternate signal stack is not set
// for the thread (the normal case) then set the alternate signal
// stack to the gsignal stack. If the alternate signal stack is set
// for the thread (the case when a non-Go thread sets the alternate
// signal stack and then calls a Go function) then set the gsignal
// stack to the alternate signal stack. We also set the alternate
// signal stack to the gsignal stack if cgo is not used (regardless
// of whether it is already set). Record which choice was made in
// newSigstack, so that it can be undone in unminit.
func minitSignalStack() {
mp := getg().m
var st stackt
sigaltstack(nil, &st)
if st.ss_flags&_SS_DISABLE != 0 || !iscgo {
signalstack(&mp.gsignal.stack)
mp.newSigstack = true
} else {
setGsignalStack(&st, &mp.goSigStack)
mp.newSigstack = false
}
}
// minitSignalMask is called when initializing a new m to set the
// thread's signal mask. When this is called all signals have been
// blocked for the thread. This starts with m.sigmask, which was set
// either from initSigmask for a newly created thread or by calling
// sigsave if this is a non-Go thread calling a Go function. It
// removes all essential signals from the mask, thus causing those
// signals to not be blocked. Then it sets the thread's signal mask.
// After this is called the thread can receive signals.
func minitSignalMask() {
nmask := getg().m.sigmask
for i := range sigtable {
if !blockableSig(uint32(i)) {
sigdelset(&nmask, i)
}
}
sigprocmask(_SIG_SETMASK, &nmask, nil)
}
// unminitSignals is called from dropm, via unminit, to undo the
// effect of calling minit on a non-Go thread.
//
//go:nosplit
func unminitSignals() {
if getg().m.newSigstack {
st := stackt{ss_flags: _SS_DISABLE}
sigaltstack(&st, nil)
} else {
runtime: restore the Go-allocated signal stack in unminit Currently, when we minit on a thread that already has an alternate signal stack (e.g., because the M was an extram being used for a cgo callback, or to handle a signal on a C thread, or because the platform's libc always allocates a signal stack like on Android), we simply drop the Go-allocated gsignal stack on the floor. This is a problem for Ms on the extram list because those Ms may later be reused for a different thread that may not have its own alternate signal stack. On tip, this manifests as a crash in sigaltstack because we clear the gsignal stack bounds in unminit and later try to use those cleared bounds when we re-minit that M. On 1.9 and earlier, we didn't clear the bounds, so this manifests as running more than one signal handler on the same signal stack, which could lead to arbitrary memory corruption. This CL fixes this problem by saving the Go-allocated gsignal stack in a new field in the m struct when overwriting it with a system-provided signal stack, and then restoring the original gsignal stack in unminit. This CL is designed to be easy to back-port to 1.9. It won't quite cherry-pick cleanly, but it should be sufficient to simply ignore the change in mexit (which didn't exist in 1.9). Now that we always have a place to stash the original signal stack in the m struct, there are some simplifications we can make to the signal stack handling. We'll do those in a later CL. Fixes #22930. Change-Id: I55c5a6dd9d97532f131146afdef0b216e1433054 Reviewed-on: https://go-review.googlesource.com/81476 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2017-11-30 22:09:35 -05:00
// We got the signal stack from someone else. Restore
// the Go-allocated stack in case this M gets reused
// for another thread (e.g., it's an extram). Also, on
// Android, libc allocates a signal stack for all
// threads, so it's important to restore the Go stack
// even on Go-created threads so we can free it.
restoreGsignalStack(&getg().m.goSigStack)
}
}
// blockableSig reports whether sig may be blocked by the signal mask.
// We never want to block the signals marked _SigUnblock;
// these are the synchronous signals that turn into a Go panic.
// We never want to block the preemption signal if it is being used.
// In a Go program--not a c-archive/c-shared--we never want to block
// the signals marked _SigKill or _SigThrow, as otherwise it's possible
// for all running threads to block them and delay their delivery until
// we start a new thread. When linked into a C program we let the C code
// decide on the disposition of those signals.
func blockableSig(sig uint32) bool {
flags := sigtable[sig].flags
if flags&_SigUnblock != 0 {
return false
}
if sig == sigPreempt && preemptMSupported && debug.asyncpreemptoff == 0 {
return false
}
if isarchive || islibrary {
return true
}
return flags&(_SigKill|_SigThrow) == 0
}
// gsignalStack saves the fields of the gsignal stack changed by
// setGsignalStack.
type gsignalStack struct {
stack stack
stackguard0 uintptr
stackguard1 uintptr
stktopsp uintptr
}
// setGsignalStack sets the gsignal stack of the current m to an
// alternate signal stack returned from the sigaltstack system call.
// It saves the old values in *old for use by restoreGsignalStack.
// This is used when handling a signal if non-Go code has set the
// alternate signal stack.
//
//go:nosplit
//go:nowritebarrierrec
func setGsignalStack(st *stackt, old *gsignalStack) {
gp := getg()
if old != nil {
old.stack = gp.m.gsignal.stack
old.stackguard0 = gp.m.gsignal.stackguard0
old.stackguard1 = gp.m.gsignal.stackguard1
old.stktopsp = gp.m.gsignal.stktopsp
}
stsp := uintptr(unsafe.Pointer(st.ss_sp))
gp.m.gsignal.stack.lo = stsp
gp.m.gsignal.stack.hi = stsp + st.ss_size
gp.m.gsignal.stackguard0 = stsp + stackGuard
gp.m.gsignal.stackguard1 = stsp + stackGuard
}
// restoreGsignalStack restores the gsignal stack to the value it had
// before entering the signal handler.
//
//go:nosplit
//go:nowritebarrierrec
func restoreGsignalStack(st *gsignalStack) {
gp := getg().m.gsignal
gp.stack = st.stack
gp.stackguard0 = st.stackguard0
gp.stackguard1 = st.stackguard1
gp.stktopsp = st.stktopsp
}
// signalstack sets the current thread's alternate signal stack to s.
//
//go:nosplit
func signalstack(s *stack) {
st := stackt{ss_size: s.hi - s.lo}
setSignalstackSP(&st, s.lo)
sigaltstack(&st, nil)
}
// setsigsegv is used on darwin/arm64 to fake a segmentation fault.
//
// This is exported via linkname to assembly in runtime/cgo.
//
//go:nosplit
//go:linkname setsigsegv
func setsigsegv(pc uintptr) {
gp := getg()
gp.sig = _SIGSEGV
gp.sigpc = pc
gp.sigcode0 = _SEGV_MAPERR
gp.sigcode1 = 0 // TODO: emulate si_addr
}