go/src/runtime/os_darwin.go

486 lines
12 KiB
Go
Raw Normal View History

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runtime
import (
"internal/abi"
"unsafe"
)
type mOS struct {
initialized bool
mutex pthreadmutex
cond pthreadcond
count int
}
func unimplemented(name string) {
println(name, "not implemented")
*(*int)(unsafe.Pointer(uintptr(1231))) = 1231
}
//go:nosplit
func semacreate(mp *m) {
if mp.initialized {
return
}
mp.initialized = true
if err := pthread_mutex_init(&mp.mutex, nil); err != 0 {
throw("pthread_mutex_init")
}
if err := pthread_cond_init(&mp.cond, nil); err != 0 {
throw("pthread_cond_init")
}
}
//go:nosplit
func semasleep(ns int64) int32 {
var start int64
if ns >= 0 {
start = nanotime()
}
g := getg()
mp := g.m
if g == mp.gsignal {
// sema sleep/wakeup are implemented with pthreads, which are not async-signal-safe on Darwin.
throw("semasleep on Darwin signal stack")
}
pthread_mutex_lock(&mp.mutex)
for {
if mp.count > 0 {
mp.count--
pthread_mutex_unlock(&mp.mutex)
return 0
}
if ns >= 0 {
spent := nanotime() - start
if spent >= ns {
pthread_mutex_unlock(&mp.mutex)
return -1
}
var t timespec
t.setNsec(ns - spent)
err := pthread_cond_timedwait_relative_np(&mp.cond, &mp.mutex, &t)
if err == _ETIMEDOUT {
pthread_mutex_unlock(&mp.mutex)
return -1
}
} else {
pthread_cond_wait(&mp.cond, &mp.mutex)
}
}
}
//go:nosplit
func semawakeup(mp *m) {
if g := getg(); g == g.m.gsignal {
throw("semawakeup on Darwin signal stack")
}
pthread_mutex_lock(&mp.mutex)
mp.count++
if mp.count > 0 {
pthread_cond_signal(&mp.cond)
}
pthread_mutex_unlock(&mp.mutex)
}
// The read and write file descriptors used by the sigNote functions.
var sigNoteRead, sigNoteWrite int32
// sigNoteSetup initializes a single, there-can-only-be-one, async-signal-safe note.
//
// The current implementation of notes on Darwin is not async-signal-safe,
// because the functions pthread_mutex_lock, pthread_cond_signal, and
// pthread_mutex_unlock, called by semawakeup, are not async-signal-safe.
// There is only one case where we need to wake up a note from a signal
// handler: the sigsend function. The signal handler code does not require
// all the features of notes: it does not need to do a timed wait.
// This is a separate implementation of notes, based on a pipe, that does
// not support timed waits but is async-signal-safe.
func sigNoteSetup(*note) {
if sigNoteRead != 0 || sigNoteWrite != 0 {
// Generalizing this would require avoiding the pipe-fork-closeonexec race, which entangles syscall.
throw("duplicate sigNoteSetup")
}
var errno int32
sigNoteRead, sigNoteWrite, errno = pipe()
if errno != 0 {
throw("pipe failed")
}
closeonexec(sigNoteRead)
closeonexec(sigNoteWrite)
// Make the write end of the pipe non-blocking, so that if the pipe
// buffer is somehow full we will not block in the signal handler.
// Leave the read end of the pipe blocking so that we will block
// in sigNoteSleep.
setNonblock(sigNoteWrite)
}
// sigNoteWakeup wakes up a thread sleeping on a note created by sigNoteSetup.
func sigNoteWakeup(*note) {
var b byte
write(uintptr(sigNoteWrite), unsafe.Pointer(&b), 1)
}
// sigNoteSleep waits for a note created by sigNoteSetup to be woken.
func sigNoteSleep(*note) {
for {
var b byte
entersyscallblock()
n := read(sigNoteRead, unsafe.Pointer(&b), 1)
exitsyscall()
if n != -_EINTR {
return
}
}
}
// BSD interface for threading.
func osinit() {
// pthread_create delayed until end of goenvs so that we
// can look at the environment first.
ncpu = getncpu()
physPageSize = getPageSize()
cmd/link, runtime: Apple libc atfork workaround take 3 CL 451735 worked around bugs in Apple's atfork handlers by calling notify_is_valid_token and xpc_atfork_child at startup, so that init code that wouldn't be safe in the child process would be warmed up in the parent process instead, but xpc_atfork_child broke use of the xpc library in Go programs, and xpc is internally used by various macOS frameworks (#57263). CL 459175 reverted that change, and then CL 459176 tried a new approach: use __fork, which doesn't call any of the atfork handlers at all. That worked, but an Apple engineer reviewing the change in private email suggests that since __fork is not public API, it should be avoided. The same engineer (with access to the source code for the xpc library) suggests that the breakage in #57263 is caused by xpc_atfork_child marking the library as unusable, expecting an imminent call to exec, and that calling xpc_date_create_from_current instead would do the necessary initialization without marking xpc as unusable. CL 460475 reverted that change, to prepare for this one. This CL goes back to the original “call functions to warm things up” approach, replacing xpc_atfork_child with xpc_date_create_from_current. The CL also updates cmd/link to use OS and SDK version 10.13.0 for x86 macOS binaries, up from 10.9.0, also suggested by the Apple engineer. Combined with the two warmup calls, this makes the fork hangs go away. The minimum macOS version has been 10.13 High Sierra since Go 1.17, so there should be no problem with writing that in the binaries too. Fixes #33565. Fixes #56784. Fixes #57263. Fixes #57577. Change-Id: I20769d9daa1fe9ea930f8009481335f8a14dc21b Reviewed-on: https://go-review.googlesource.com/c/go/+/460476 Auto-Submit: Russ Cox <rsc@golang.org> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Bryan Mills <bcmills@google.com> Reviewed-by: Cherry Mui <cherryyz@google.com>
2023-01-04 09:21:14 -05:00
osinit_hack()
}
func sysctlbynameInt32(name []byte) (int32, int32) {
out := int32(0)
nout := unsafe.Sizeof(out)
ret := sysctlbyname(&name[0], (*byte)(unsafe.Pointer(&out)), &nout, nil, 0)
return ret, out
}
//go:linkname internal_cpu_getsysctlbyname internal/cpu.getsysctlbyname
func internal_cpu_getsysctlbyname(name []byte) (int32, int32) {
return sysctlbynameInt32(name)
}
const (
_CTL_HW = 6
_HW_NCPU = 3
_HW_PAGESIZE = 7
)
func getncpu() int32 {
// Use sysctl to fetch hw.ncpu.
mib := [2]uint32{_CTL_HW, _HW_NCPU}
out := uint32(0)
nout := unsafe.Sizeof(out)
ret := sysctl(&mib[0], 2, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0)
if ret >= 0 && int32(out) > 0 {
return int32(out)
}
return 1
}
func getPageSize() uintptr {
// Use sysctl to fetch hw.pagesize.
mib := [2]uint32{_CTL_HW, _HW_PAGESIZE}
out := uint32(0)
nout := unsafe.Sizeof(out)
ret := sysctl(&mib[0], 2, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0)
if ret >= 0 && int32(out) > 0 {
return uintptr(out)
}
return 0
}
var urandom_dev = []byte("/dev/urandom\x00")
//go:nosplit
math/rand, math/rand/v2: use ChaCha8 for global rand Move ChaCha8 code into internal/chacha8rand and use it to implement runtime.rand, which is used for the unseeded global source for both math/rand and math/rand/v2. This also affects the calculation of the start point for iteration over very very large maps (when the 32-bit fastrand is not big enough). The benefit is that misuse of the global random number generators in math/rand and math/rand/v2 in contexts where non-predictable randomness is important for security reasons is no longer a security problem, removing a common mistake among programmers who are unaware of the different kinds of randomness. The cost is an extra 304 bytes per thread stored in the m struct plus 2-3ns more per random uint64 due to the more sophisticated algorithm. Using PCG looks like it would cost about the same, although I haven't benchmarked that. Before this, the math/rand and math/rand/v2 global generator was wyrand (https://github.com/wangyi-fudan/wyhash). For math/rand, using wyrand instead of the Mitchell/Reeds/Thompson ALFG was justifiable, since the latter was not any better. But for math/rand/v2, the global generator really should be at least as good as one of the well-studied, specific algorithms provided directly by the package, and it's not. (Wyrand is still reasonable for scheduling and cache decisions.) Good randomness does have a cost: about twice wyrand. Also rationalize the various runtime rand references. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ bbb48afeb7.amd64 │ 5cf807d1ea.amd64 │ │ sec/op │ sec/op vs base │ ChaCha8-32 1.862n ± 2% 1.861n ± 2% ~ (p=0.825 n=20) PCG_DXSM-32 1.471n ± 1% 1.460n ± 2% ~ (p=0.153 n=20) SourceUint64-32 1.636n ± 2% 1.582n ± 1% -3.30% (p=0.000 n=20) GlobalInt64-32 2.087n ± 1% 3.663n ± 1% +75.54% (p=0.000 n=20) GlobalInt64Parallel-32 0.1042n ± 1% 0.2026n ± 1% +94.48% (p=0.000 n=20) GlobalUint64-32 2.263n ± 2% 3.724n ± 1% +64.57% (p=0.000 n=20) GlobalUint64Parallel-32 0.1019n ± 1% 0.1973n ± 1% +93.67% (p=0.000 n=20) Int64-32 1.771n ± 1% 1.774n ± 1% ~ (p=0.449 n=20) Uint64-32 1.863n ± 2% 1.866n ± 1% ~ (p=0.364 n=20) GlobalIntN1000-32 3.134n ± 3% 4.730n ± 2% +50.95% (p=0.000 n=20) IntN1000-32 2.489n ± 1% 2.489n ± 1% ~ (p=0.683 n=20) Int64N1000-32 2.521n ± 1% 2.516n ± 1% ~ (p=0.394 n=20) Int64N1e8-32 2.479n ± 1% 2.478n ± 2% ~ (p=0.743 n=20) Int64N1e9-32 2.530n ± 2% 2.514n ± 2% ~ (p=0.193 n=20) Int64N2e9-32 2.501n ± 1% 2.494n ± 1% ~ (p=0.616 n=20) Int64N1e18-32 3.227n ± 1% 3.205n ± 1% ~ (p=0.101 n=20) Int64N2e18-32 3.647n ± 1% 3.599n ± 1% ~ (p=0.019 n=20) Int64N4e18-32 5.135n ± 1% 5.069n ± 2% ~ (p=0.034 n=20) Int32N1000-32 2.657n ± 1% 2.637n ± 1% ~ (p=0.180 n=20) Int32N1e8-32 2.636n ± 1% 2.636n ± 1% ~ (p=0.763 n=20) Int32N1e9-32 2.660n ± 2% 2.638n ± 1% ~ (p=0.358 n=20) Int32N2e9-32 2.662n ± 2% 2.618n ± 2% ~ (p=0.064 n=20) Float32-32 2.272n ± 2% 2.239n ± 2% ~ (p=0.194 n=20) Float64-32 2.272n ± 1% 2.286n ± 2% ~ (p=0.763 n=20) ExpFloat64-32 3.762n ± 1% 3.744n ± 1% ~ (p=0.171 n=20) NormFloat64-32 3.706n ± 1% 3.655n ± 2% ~ (p=0.066 n=20) Perm3-32 32.93n ± 3% 34.62n ± 1% +5.13% (p=0.000 n=20) Perm30-32 202.9n ± 1% 204.0n ± 1% ~ (p=0.482 n=20) Perm30ViaShuffle-32 115.0n ± 1% 114.9n ± 1% ~ (p=0.358 n=20) ShuffleOverhead-32 112.8n ± 1% 112.7n ± 1% ~ (p=0.692 n=20) Concurrent-32 2.107n ± 0% 3.725n ± 1% +76.75% (p=0.000 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 │ bbb48afeb7.arm64 │ 5cf807d1ea.arm64 │ │ sec/op │ sec/op vs base │ ChaCha8-8 2.480n ± 0% 2.429n ± 0% -2.04% (p=0.000 n=20) PCG_DXSM-8 2.531n ± 0% 2.530n ± 0% ~ (p=0.877 n=20) SourceUint64-8 2.534n ± 0% 2.533n ± 0% ~ (p=0.732 n=20) GlobalInt64-8 2.172n ± 1% 4.794n ± 0% +120.67% (p=0.000 n=20) GlobalInt64Parallel-8 0.4320n ± 0% 0.9605n ± 0% +122.32% (p=0.000 n=20) GlobalUint64-8 2.182n ± 0% 4.770n ± 0% +118.58% (p=0.000 n=20) GlobalUint64Parallel-8 0.4307n ± 0% 0.9583n ± 0% +122.51% (p=0.000 n=20) Int64-8 4.107n ± 0% 4.104n ± 0% ~ (p=0.416 n=20) Uint64-8 4.080n ± 0% 4.080n ± 0% ~ (p=0.052 n=20) GlobalIntN1000-8 2.814n ± 2% 5.643n ± 0% +100.50% (p=0.000 n=20) IntN1000-8 4.141n ± 0% 4.139n ± 0% ~ (p=0.140 n=20) Int64N1000-8 4.140n ± 0% 4.140n ± 0% ~ (p=0.313 n=20) Int64N1e8-8 4.140n ± 0% 4.139n ± 0% ~ (p=0.103 n=20) Int64N1e9-8 4.139n ± 0% 4.140n ± 0% ~ (p=0.761 n=20) Int64N2e9-8 4.140n ± 0% 4.140n ± 0% ~ (p=0.636 n=20) Int64N1e18-8 5.266n ± 0% 5.326n ± 1% +1.14% (p=0.001 n=20) Int64N2e18-8 6.052n ± 0% 6.167n ± 0% +1.90% (p=0.000 n=20) Int64N4e18-8 8.826n ± 0% 9.051n ± 0% +2.55% (p=0.000 n=20) Int32N1000-8 4.127n ± 0% 4.132n ± 0% +0.12% (p=0.000 n=20) Int32N1e8-8 4.126n ± 0% 4.131n ± 0% +0.12% (p=0.000 n=20) Int32N1e9-8 4.127n ± 0% 4.132n ± 0% +0.12% (p=0.000 n=20) Int32N2e9-8 4.132n ± 0% 4.131n ± 0% ~ (p=0.017 n=20) Float32-8 4.109n ± 0% 4.105n ± 0% ~ (p=0.379 n=20) Float64-8 4.107n ± 0% 4.106n ± 0% ~ (p=0.867 n=20) ExpFloat64-8 5.339n ± 0% 5.383n ± 0% +0.82% (p=0.000 n=20) NormFloat64-8 5.735n ± 0% 5.737n ± 1% ~ (p=0.856 n=20) Perm3-8 26.65n ± 0% 26.80n ± 1% +0.58% (p=0.000 n=20) Perm30-8 194.8n ± 1% 197.0n ± 0% +1.18% (p=0.000 n=20) Perm30ViaShuffle-8 156.6n ± 0% 157.6n ± 1% +0.61% (p=0.000 n=20) ShuffleOverhead-8 124.9n ± 0% 125.5n ± 0% +0.52% (p=0.000 n=20) Concurrent-8 2.434n ± 3% 5.066n ± 0% +108.09% (p=0.000 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ bbb48afeb7.386 │ 5cf807d1ea.386 │ │ sec/op │ sec/op vs base │ ChaCha8-32 11.295n ± 1% 4.748n ± 2% -57.96% (p=0.000 n=20) PCG_DXSM-32 7.693n ± 1% 7.738n ± 2% ~ (p=0.542 n=20) SourceUint64-32 7.658n ± 2% 7.622n ± 2% ~ (p=0.344 n=20) GlobalInt64-32 3.473n ± 2% 7.526n ± 2% +116.73% (p=0.000 n=20) GlobalInt64Parallel-32 0.3198n ± 0% 0.5444n ± 0% +70.22% (p=0.000 n=20) GlobalUint64-32 3.612n ± 0% 7.575n ± 1% +109.69% (p=0.000 n=20) GlobalUint64Parallel-32 0.3168n ± 0% 0.5403n ± 0% +70.51% (p=0.000 n=20) Int64-32 7.673n ± 2% 7.789n ± 1% ~ (p=0.122 n=20) Uint64-32 7.773n ± 1% 7.827n ± 2% ~ (p=0.920 n=20) GlobalIntN1000-32 6.268n ± 1% 9.581n ± 1% +52.87% (p=0.000 n=20) IntN1000-32 10.33n ± 2% 10.45n ± 1% ~ (p=0.233 n=20) Int64N1000-32 10.98n ± 2% 11.01n ± 1% ~ (p=0.401 n=20) Int64N1e8-32 11.19n ± 2% 10.97n ± 1% ~ (p=0.033 n=20) Int64N1e9-32 11.06n ± 1% 11.08n ± 1% ~ (p=0.498 n=20) Int64N2e9-32 11.10n ± 1% 11.01n ± 2% ~ (p=0.995 n=20) Int64N1e18-32 15.23n ± 2% 15.04n ± 1% ~ (p=0.973 n=20) Int64N2e18-32 15.89n ± 1% 15.85n ± 1% ~ (p=0.409 n=20) Int64N4e18-32 18.96n ± 2% 19.34n ± 2% ~ (p=0.048 n=20) Int32N1000-32 10.46n ± 2% 10.44n ± 2% ~ (p=0.480 n=20) Int32N1e8-32 10.46n ± 2% 10.49n ± 2% ~ (p=0.951 n=20) Int32N1e9-32 10.28n ± 2% 10.26n ± 1% ~ (p=0.431 n=20) Int32N2e9-32 10.50n ± 2% 10.44n ± 2% ~ (p=0.249 n=20) Float32-32 13.80n ± 2% 13.80n ± 2% ~ (p=0.751 n=20) Float64-32 23.55n ± 2% 23.87n ± 0% ~ (p=0.408 n=20) ExpFloat64-32 15.36n ± 1% 15.29n ± 2% ~ (p=0.316 n=20) NormFloat64-32 13.57n ± 1% 13.79n ± 1% +1.66% (p=0.005 n=20) Perm3-32 45.70n ± 2% 46.99n ± 2% +2.81% (p=0.001 n=20) Perm30-32 399.0n ± 1% 403.8n ± 1% +1.19% (p=0.006 n=20) Perm30ViaShuffle-32 349.0n ± 1% 350.4n ± 1% ~ (p=0.909 n=20) ShuffleOverhead-32 322.3n ± 1% 323.8n ± 1% ~ (p=0.410 n=20) Concurrent-32 3.331n ± 1% 7.312n ± 1% +119.50% (p=0.000 n=20) For #61716. Change-Id: Ibdddeed85c34d9ae397289dc899e04d4845f9ed2 Reviewed-on: https://go-review.googlesource.com/c/go/+/516860 Reviewed-by: Michael Pratt <mpratt@google.com> Reviewed-by: Filippo Valsorda <filippo@golang.org> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-08-06 13:26:28 +10:00
func readRandom(r []byte) int {
fd := open(&urandom_dev[0], 0 /* O_RDONLY */, 0)
n := read(fd, unsafe.Pointer(&r[0]), int32(len(r)))
closefd(fd)
math/rand, math/rand/v2: use ChaCha8 for global rand Move ChaCha8 code into internal/chacha8rand and use it to implement runtime.rand, which is used for the unseeded global source for both math/rand and math/rand/v2. This also affects the calculation of the start point for iteration over very very large maps (when the 32-bit fastrand is not big enough). The benefit is that misuse of the global random number generators in math/rand and math/rand/v2 in contexts where non-predictable randomness is important for security reasons is no longer a security problem, removing a common mistake among programmers who are unaware of the different kinds of randomness. The cost is an extra 304 bytes per thread stored in the m struct plus 2-3ns more per random uint64 due to the more sophisticated algorithm. Using PCG looks like it would cost about the same, although I haven't benchmarked that. Before this, the math/rand and math/rand/v2 global generator was wyrand (https://github.com/wangyi-fudan/wyhash). For math/rand, using wyrand instead of the Mitchell/Reeds/Thompson ALFG was justifiable, since the latter was not any better. But for math/rand/v2, the global generator really should be at least as good as one of the well-studied, specific algorithms provided directly by the package, and it's not. (Wyrand is still reasonable for scheduling and cache decisions.) Good randomness does have a cost: about twice wyrand. Also rationalize the various runtime rand references. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ bbb48afeb7.amd64 │ 5cf807d1ea.amd64 │ │ sec/op │ sec/op vs base │ ChaCha8-32 1.862n ± 2% 1.861n ± 2% ~ (p=0.825 n=20) PCG_DXSM-32 1.471n ± 1% 1.460n ± 2% ~ (p=0.153 n=20) SourceUint64-32 1.636n ± 2% 1.582n ± 1% -3.30% (p=0.000 n=20) GlobalInt64-32 2.087n ± 1% 3.663n ± 1% +75.54% (p=0.000 n=20) GlobalInt64Parallel-32 0.1042n ± 1% 0.2026n ± 1% +94.48% (p=0.000 n=20) GlobalUint64-32 2.263n ± 2% 3.724n ± 1% +64.57% (p=0.000 n=20) GlobalUint64Parallel-32 0.1019n ± 1% 0.1973n ± 1% +93.67% (p=0.000 n=20) Int64-32 1.771n ± 1% 1.774n ± 1% ~ (p=0.449 n=20) Uint64-32 1.863n ± 2% 1.866n ± 1% ~ (p=0.364 n=20) GlobalIntN1000-32 3.134n ± 3% 4.730n ± 2% +50.95% (p=0.000 n=20) IntN1000-32 2.489n ± 1% 2.489n ± 1% ~ (p=0.683 n=20) Int64N1000-32 2.521n ± 1% 2.516n ± 1% ~ (p=0.394 n=20) Int64N1e8-32 2.479n ± 1% 2.478n ± 2% ~ (p=0.743 n=20) Int64N1e9-32 2.530n ± 2% 2.514n ± 2% ~ (p=0.193 n=20) Int64N2e9-32 2.501n ± 1% 2.494n ± 1% ~ (p=0.616 n=20) Int64N1e18-32 3.227n ± 1% 3.205n ± 1% ~ (p=0.101 n=20) Int64N2e18-32 3.647n ± 1% 3.599n ± 1% ~ (p=0.019 n=20) Int64N4e18-32 5.135n ± 1% 5.069n ± 2% ~ (p=0.034 n=20) Int32N1000-32 2.657n ± 1% 2.637n ± 1% ~ (p=0.180 n=20) Int32N1e8-32 2.636n ± 1% 2.636n ± 1% ~ (p=0.763 n=20) Int32N1e9-32 2.660n ± 2% 2.638n ± 1% ~ (p=0.358 n=20) Int32N2e9-32 2.662n ± 2% 2.618n ± 2% ~ (p=0.064 n=20) Float32-32 2.272n ± 2% 2.239n ± 2% ~ (p=0.194 n=20) Float64-32 2.272n ± 1% 2.286n ± 2% ~ (p=0.763 n=20) ExpFloat64-32 3.762n ± 1% 3.744n ± 1% ~ (p=0.171 n=20) NormFloat64-32 3.706n ± 1% 3.655n ± 2% ~ (p=0.066 n=20) Perm3-32 32.93n ± 3% 34.62n ± 1% +5.13% (p=0.000 n=20) Perm30-32 202.9n ± 1% 204.0n ± 1% ~ (p=0.482 n=20) Perm30ViaShuffle-32 115.0n ± 1% 114.9n ± 1% ~ (p=0.358 n=20) ShuffleOverhead-32 112.8n ± 1% 112.7n ± 1% ~ (p=0.692 n=20) Concurrent-32 2.107n ± 0% 3.725n ± 1% +76.75% (p=0.000 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 │ bbb48afeb7.arm64 │ 5cf807d1ea.arm64 │ │ sec/op │ sec/op vs base │ ChaCha8-8 2.480n ± 0% 2.429n ± 0% -2.04% (p=0.000 n=20) PCG_DXSM-8 2.531n ± 0% 2.530n ± 0% ~ (p=0.877 n=20) SourceUint64-8 2.534n ± 0% 2.533n ± 0% ~ (p=0.732 n=20) GlobalInt64-8 2.172n ± 1% 4.794n ± 0% +120.67% (p=0.000 n=20) GlobalInt64Parallel-8 0.4320n ± 0% 0.9605n ± 0% +122.32% (p=0.000 n=20) GlobalUint64-8 2.182n ± 0% 4.770n ± 0% +118.58% (p=0.000 n=20) GlobalUint64Parallel-8 0.4307n ± 0% 0.9583n ± 0% +122.51% (p=0.000 n=20) Int64-8 4.107n ± 0% 4.104n ± 0% ~ (p=0.416 n=20) Uint64-8 4.080n ± 0% 4.080n ± 0% ~ (p=0.052 n=20) GlobalIntN1000-8 2.814n ± 2% 5.643n ± 0% +100.50% (p=0.000 n=20) IntN1000-8 4.141n ± 0% 4.139n ± 0% ~ (p=0.140 n=20) Int64N1000-8 4.140n ± 0% 4.140n ± 0% ~ (p=0.313 n=20) Int64N1e8-8 4.140n ± 0% 4.139n ± 0% ~ (p=0.103 n=20) Int64N1e9-8 4.139n ± 0% 4.140n ± 0% ~ (p=0.761 n=20) Int64N2e9-8 4.140n ± 0% 4.140n ± 0% ~ (p=0.636 n=20) Int64N1e18-8 5.266n ± 0% 5.326n ± 1% +1.14% (p=0.001 n=20) Int64N2e18-8 6.052n ± 0% 6.167n ± 0% +1.90% (p=0.000 n=20) Int64N4e18-8 8.826n ± 0% 9.051n ± 0% +2.55% (p=0.000 n=20) Int32N1000-8 4.127n ± 0% 4.132n ± 0% +0.12% (p=0.000 n=20) Int32N1e8-8 4.126n ± 0% 4.131n ± 0% +0.12% (p=0.000 n=20) Int32N1e9-8 4.127n ± 0% 4.132n ± 0% +0.12% (p=0.000 n=20) Int32N2e9-8 4.132n ± 0% 4.131n ± 0% ~ (p=0.017 n=20) Float32-8 4.109n ± 0% 4.105n ± 0% ~ (p=0.379 n=20) Float64-8 4.107n ± 0% 4.106n ± 0% ~ (p=0.867 n=20) ExpFloat64-8 5.339n ± 0% 5.383n ± 0% +0.82% (p=0.000 n=20) NormFloat64-8 5.735n ± 0% 5.737n ± 1% ~ (p=0.856 n=20) Perm3-8 26.65n ± 0% 26.80n ± 1% +0.58% (p=0.000 n=20) Perm30-8 194.8n ± 1% 197.0n ± 0% +1.18% (p=0.000 n=20) Perm30ViaShuffle-8 156.6n ± 0% 157.6n ± 1% +0.61% (p=0.000 n=20) ShuffleOverhead-8 124.9n ± 0% 125.5n ± 0% +0.52% (p=0.000 n=20) Concurrent-8 2.434n ± 3% 5.066n ± 0% +108.09% (p=0.000 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ bbb48afeb7.386 │ 5cf807d1ea.386 │ │ sec/op │ sec/op vs base │ ChaCha8-32 11.295n ± 1% 4.748n ± 2% -57.96% (p=0.000 n=20) PCG_DXSM-32 7.693n ± 1% 7.738n ± 2% ~ (p=0.542 n=20) SourceUint64-32 7.658n ± 2% 7.622n ± 2% ~ (p=0.344 n=20) GlobalInt64-32 3.473n ± 2% 7.526n ± 2% +116.73% (p=0.000 n=20) GlobalInt64Parallel-32 0.3198n ± 0% 0.5444n ± 0% +70.22% (p=0.000 n=20) GlobalUint64-32 3.612n ± 0% 7.575n ± 1% +109.69% (p=0.000 n=20) GlobalUint64Parallel-32 0.3168n ± 0% 0.5403n ± 0% +70.51% (p=0.000 n=20) Int64-32 7.673n ± 2% 7.789n ± 1% ~ (p=0.122 n=20) Uint64-32 7.773n ± 1% 7.827n ± 2% ~ (p=0.920 n=20) GlobalIntN1000-32 6.268n ± 1% 9.581n ± 1% +52.87% (p=0.000 n=20) IntN1000-32 10.33n ± 2% 10.45n ± 1% ~ (p=0.233 n=20) Int64N1000-32 10.98n ± 2% 11.01n ± 1% ~ (p=0.401 n=20) Int64N1e8-32 11.19n ± 2% 10.97n ± 1% ~ (p=0.033 n=20) Int64N1e9-32 11.06n ± 1% 11.08n ± 1% ~ (p=0.498 n=20) Int64N2e9-32 11.10n ± 1% 11.01n ± 2% ~ (p=0.995 n=20) Int64N1e18-32 15.23n ± 2% 15.04n ± 1% ~ (p=0.973 n=20) Int64N2e18-32 15.89n ± 1% 15.85n ± 1% ~ (p=0.409 n=20) Int64N4e18-32 18.96n ± 2% 19.34n ± 2% ~ (p=0.048 n=20) Int32N1000-32 10.46n ± 2% 10.44n ± 2% ~ (p=0.480 n=20) Int32N1e8-32 10.46n ± 2% 10.49n ± 2% ~ (p=0.951 n=20) Int32N1e9-32 10.28n ± 2% 10.26n ± 1% ~ (p=0.431 n=20) Int32N2e9-32 10.50n ± 2% 10.44n ± 2% ~ (p=0.249 n=20) Float32-32 13.80n ± 2% 13.80n ± 2% ~ (p=0.751 n=20) Float64-32 23.55n ± 2% 23.87n ± 0% ~ (p=0.408 n=20) ExpFloat64-32 15.36n ± 1% 15.29n ± 2% ~ (p=0.316 n=20) NormFloat64-32 13.57n ± 1% 13.79n ± 1% +1.66% (p=0.005 n=20) Perm3-32 45.70n ± 2% 46.99n ± 2% +2.81% (p=0.001 n=20) Perm30-32 399.0n ± 1% 403.8n ± 1% +1.19% (p=0.006 n=20) Perm30ViaShuffle-32 349.0n ± 1% 350.4n ± 1% ~ (p=0.909 n=20) ShuffleOverhead-32 322.3n ± 1% 323.8n ± 1% ~ (p=0.410 n=20) Concurrent-32 3.331n ± 1% 7.312n ± 1% +119.50% (p=0.000 n=20) For #61716. Change-Id: Ibdddeed85c34d9ae397289dc899e04d4845f9ed2 Reviewed-on: https://go-review.googlesource.com/c/go/+/516860 Reviewed-by: Michael Pratt <mpratt@google.com> Reviewed-by: Filippo Valsorda <filippo@golang.org> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-08-06 13:26:28 +10:00
return int(n)
}
func goenvs() {
goenvs_unix()
}
// May run with m.p==nil, so write barriers are not allowed.
//
//go:nowritebarrierrec
func newosproc(mp *m) {
stk := unsafe.Pointer(mp.g0.stack.hi)
if false {
print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " id=", mp.id, " ostk=", &mp, "\n")
}
// Initialize an attribute object.
var attr pthreadattr
var err int32
err = pthread_attr_init(&attr)
if err != 0 {
writeErrStr(failthreadcreate)
exit(1)
}
// Find out OS stack size for our own stack guard.
var stacksize uintptr
if pthread_attr_getstacksize(&attr, &stacksize) != 0 {
writeErrStr(failthreadcreate)
exit(1)
}
mp.g0.stack.hi = stacksize // for mstart
// Tell the pthread library we won't join with this thread.
if pthread_attr_setdetachstate(&attr, _PTHREAD_CREATE_DETACHED) != 0 {
writeErrStr(failthreadcreate)
exit(1)
}
// Finally, create the thread. It starts at mstart_stub, which does some low-level
// setup and then calls mstart.
var oset sigset
sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
err = retryOnEAGAIN(func() int32 {
return pthread_create(&attr, abi.FuncPCABI0(mstart_stub), unsafe.Pointer(mp))
})
sigprocmask(_SIG_SETMASK, &oset, nil)
if err != 0 {
writeErrStr(failthreadcreate)
exit(1)
}
}
// glue code to call mstart from pthread_create.
func mstart_stub()
// newosproc0 is a version of newosproc that can be called before the runtime
// is initialized.
//
// This function is not safe to use after initialization as it does not pass an M as fnarg.
//
//go:nosplit
func newosproc0(stacksize uintptr, fn uintptr) {
// Initialize an attribute object.
var attr pthreadattr
var err int32
err = pthread_attr_init(&attr)
if err != 0 {
writeErrStr(failthreadcreate)
exit(1)
}
// The caller passes in a suggested stack size,
// from when we allocated the stack and thread ourselves,
// without libpthread. Now that we're using libpthread,
// we use the OS default stack size instead of the suggestion.
// Find out that stack size for our own stack guard.
if pthread_attr_getstacksize(&attr, &stacksize) != 0 {
writeErrStr(failthreadcreate)
exit(1)
}
g0.stack.hi = stacksize // for mstart
runtime: delineate which memstats are system stats with a type This change modifies the type of several mstats fields to be a new type: sysMemStat. This type has the same structure as the fields used to have. The purpose of this change is to make it very clear which stats may be used in various functions for accounting (usually the platform-specific sys* functions, but there are others). Currently there's an implicit understanding that the *uint64 value passed to these functions is some kind of statistic whose value is atomically managed. This understanding isn't inherently problematic, but we're about to change how some stats (which currently use mSysStatInc and mSysStatDec) work, so we want to make it very clear what the various requirements are around "sysStat". This change also removes mSysStatInc and mSysStatDec in favor of a method on sysMemStat. Note that those two functions were originally written the way they were because atomic 64-bit adds required a valid G on ARM, but this hasn't been the case for a very long time (since golang.org/cl/14204, but even before then it wasn't clear if mutexes required a valid G anymore). Today we implement 64-bit adds on ARM with a spinlock table. Change-Id: I4e9b37cf14afc2ae20cf736e874eb0064af086d7 Reviewed-on: https://go-review.googlesource.com/c/go/+/246971 Run-TryBot: Michael Knyszek <mknyszek@google.com> TryBot-Result: Go Bot <gobot@golang.org> Trust: Michael Knyszek <mknyszek@google.com> Reviewed-by: Michael Pratt <mpratt@google.com>
2020-07-29 20:25:05 +00:00
memstats.stacks_sys.add(int64(stacksize))
// Tell the pthread library we won't join with this thread.
if pthread_attr_setdetachstate(&attr, _PTHREAD_CREATE_DETACHED) != 0 {
writeErrStr(failthreadcreate)
exit(1)
}
// Finally, create the thread. It starts at mstart_stub, which does some low-level
// setup and then calls mstart.
var oset sigset
sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
err = pthread_create(&attr, fn, nil)
sigprocmask(_SIG_SETMASK, &oset, nil)
if err != 0 {
writeErrStr(failthreadcreate)
exit(1)
}
}
// Called to do synchronous initialization of Go code built with
// -buildmode=c-archive or -buildmode=c-shared.
// None of the Go runtime is initialized.
//
//go:nosplit
//go:nowritebarrierrec
func libpreinit() {
initsig(true)
}
// Called to initialize a new m (including the bootstrap m).
// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
func mpreinit(mp *m) {
mp.gsignal = malg(32 * 1024) // OS X wants >= 8K
mp.gsignal.m = mp
if GOOS == "darwin" && GOARCH == "arm64" {
// mlock the signal stack to work around a kernel bug where it may
// SIGILL when the signal stack is not faulted in while a signal
// arrives. See issue 42774.
mlock(unsafe.Pointer(mp.gsignal.stack.hi-physPageSize), physPageSize)
}
}
// Called to initialize a new m (including the bootstrap m).
// Called on the new thread, cannot allocate memory.
func minit() {
// iOS does not support alternate signal stack.
// The signal handler handles it directly.
if !(GOOS == "ios" && GOARCH == "arm64") {
minitSignalStack()
}
minitSignalMask()
getg().m.procid = uint64(pthread_self())
}
// Called from dropm to undo the effect of an minit.
//
//go:nosplit
func unminit() {
// iOS does not support alternate signal stack.
// See minit.
if !(GOOS == "ios" && GOARCH == "arm64") {
unminitSignals()
}
getg().m.procid = 0
}
// Called from exitm, but not from drop, to undo the effect of thread-owned
// resources in minit, semacreate, or elsewhere. Do not take locks after calling this.
func mdestroy(mp *m) {
}
runtime: clean up system calls during cgo callback init During a cgocallback, the runtime calls needm to get an m. The calls made during needm cannot themselves assume that there is an m or a g (which is attached to the m). In the old days of making direct system calls, the only thing you had to do for such functions was mark them //go:nosplit, to avoid the use of g in the stack split prologue. But now, on operating systems that make system calls through shared libraries and use code that saves state in the g or m before doing so, it's not safe to assume g exists. In fact, it is not even safe to call getg(), because it might fault deferencing the TLS storage to find the g pointer (that storage may not be initialized yet, at least on Windows, and perhaps on other systems in the future). The specific routines that are problematic are usleep and osyield, which are called during lock contention in lockextra, called from needm. All this is rather subtle and hidden, so in addition to fixing the problem on Windows, this CL makes the fact of not running on a g much clearer by introducing variants usleep_no_g and osyield_no_g whose names should make clear that there is no g. And then we can remove the various sketchy getg() == nil checks in the existing routines. As part of this cleanup, this CL also deletes onosstack on Windows. onosstack is from back when the runtime was implemented in C. It predates systemstack but does essentially the same thing. Instead of having two different copies of this code, we can use systemstack consistently. This way we need not port onosstack to each architecture. This CL is part of a stack adding windows/arm64 support (#36439), intended to land in the Go 1.17 cycle. This CL is, however, not windows/arm64-specific. It is cleanup meant to make the port (and future ports) easier. Change-Id: I3352de1fd0a3c26267c6e209063e6e86abd26187 Reviewed-on: https://go-review.googlesource.com/c/go/+/288793 Trust: Russ Cox <rsc@golang.org> Trust: Jason A. Donenfeld <Jason@zx2c4.com> Reviewed-by: Cherry Zhang <cherryyz@google.com> Reviewed-by: Jason A. Donenfeld <Jason@zx2c4.com>
2021-01-30 07:07:42 -05:00
//go:nosplit
func osyield_no_g() {
usleep_no_g(1)
}
//go:nosplit
func osyield() {
usleep(1)
}
const (
_NSIG = 32
_SI_USER = 0 /* empirically true, but not what headers say */
_SIG_BLOCK = 1
_SIG_UNBLOCK = 2
_SIG_SETMASK = 3
_SS_DISABLE = 4
)
//extern SigTabTT runtime·sigtab[];
type sigset uint32
var sigset_all = ^sigset(0)
//go:nosplit
//go:nowritebarrierrec
func setsig(i uint32, fn uintptr) {
var sa usigactiont
sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK | _SA_RESTART
sa.sa_mask = ^uint32(0)
if fn == abi.FuncPCABIInternal(sighandler) { // abi.FuncPCABIInternal(sighandler) matches the callers in signal_unix.go
if iscgo {
fn = abi.FuncPCABI0(cgoSigtramp)
} else {
fn = abi.FuncPCABI0(sigtramp)
}
}
*(*uintptr)(unsafe.Pointer(&sa.__sigaction_u)) = fn
sigaction(i, &sa, nil)
}
// sigtramp is the callback from libc when a signal is received.
// It is called with the C calling convention.
func sigtramp()
func cgoSigtramp()
//go:nosplit
//go:nowritebarrierrec
func setsigstack(i uint32) {
var osa usigactiont
sigaction(i, nil, &osa)
handler := *(*uintptr)(unsafe.Pointer(&osa.__sigaction_u))
if osa.sa_flags&_SA_ONSTACK != 0 {
return
}
var sa usigactiont
*(*uintptr)(unsafe.Pointer(&sa.__sigaction_u)) = handler
sa.sa_mask = osa.sa_mask
sa.sa_flags = osa.sa_flags | _SA_ONSTACK
sigaction(i, &sa, nil)
}
//go:nosplit
//go:nowritebarrierrec
func getsig(i uint32) uintptr {
var sa usigactiont
sigaction(i, nil, &sa)
return *(*uintptr)(unsafe.Pointer(&sa.__sigaction_u))
}
// setSignalstackSP sets the ss_sp field of a stackt.
//
//go:nosplit
func setSignalstackSP(s *stackt, sp uintptr) {
*(*uintptr)(unsafe.Pointer(&s.ss_sp)) = sp
}
//go:nosplit
//go:nowritebarrierrec
func sigaddset(mask *sigset, i int) {
*mask |= 1 << (uint32(i) - 1)
}
func sigdelset(mask *sigset, i int) {
*mask &^= 1 << (uint32(i) - 1)
}
func setProcessCPUProfiler(hz int32) {
setProcessCPUProfilerTimer(hz)
}
func setThreadCPUProfiler(hz int32) {
setThreadCPUProfilerHz(hz)
}
//go:nosplit
func validSIGPROF(mp *m, c *sigctxt) bool {
return true
}
//go:linkname executablePath os.executablePath
var executablePath string
func sysargs(argc int32, argv **byte) {
// skip over argv, envv and the first string will be the path
n := argc + 1
for argv_index(argv, n) != nil {
n++
}
executablePath = gostringnocopy(argv_index(argv, n+1))
// strip "executable_path=" prefix if available, it's added after OS X 10.11.
const prefix = "executable_path="
if len(executablePath) > len(prefix) && executablePath[:len(prefix)] == prefix {
executablePath = executablePath[len(prefix):]
}
}
func signalM(mp *m, sig int) {
pthread_kill(pthread(mp.procid), uint32(sig))
}
runtime, syscall: reimplement AllThreadsSyscall using only signals. In issue 50113, we see that a thread blocked in a system call can result in a hang of AllThreadsSyscall. To resolve this, we must send a signal to these threads to knock them out of the system call long enough to run the per-thread syscall. Stepping back, if we need to send signals anyway, it should be possible to implement this entire mechanism on top of signals. This CL does so, vastly simplifying the mechanism, both as a direct result of newly-unnecessary code as well as some ancillary simplifications to make things simpler to follow. Major changes: * The rest of the mechanism is moved to os_linux.go, with fields in mOS instead of m itself. * 'Fixup' fields and functions are renamed to 'perThreadSyscall' so they are more precise about their purpose. * Rather than getting passed a closure, doAllThreadsSyscall takes the syscall number and arguments. This avoids a lot of hairy behavior: * The closure may potentially only be live in fields in the M, hidden from the GC. Not necessary with no closure. * The need to loan out the race context. A direct RawSyscall6 call does not require any race context. * The closure previously conditionally panicked in strange locations, like a signal handler. Now we simply throw. * All manual fixup synchronization with mPark, sysmon, templateThread, sigqueue, etc is gone. The core approach is much simpler: doAllThreadsSyscall sends a signal to every thread in allm, which executes the system call from the signal handler. We use (SIGRTMIN + 1), aka SIGSETXID, the same signal used by glibc for this purpose. As such, we are careful to only handle this signal on non-cgo binaries. Synchronization with thread creation is a key part of this CL. The comment near the top of doAllThreadsSyscall describes the required synchronization semantics and how they are achieved. Note that current use of allocmLock protects the state mutations of allm that are also protected by sched.lock. allocmLock is used instead of sched.lock simply to avoid holding sched.lock for so long. Fixes #50113 Change-Id: Ic7ea856dc66cf711731540a54996e08fc986ce84 Reviewed-on: https://go-review.googlesource.com/c/go/+/383434 Reviewed-by: Austin Clements <austin@google.com> Trust: Michael Pratt <mpratt@google.com> Run-TryBot: Michael Pratt <mpratt@google.com> TryBot-Result: Gopher Robot <gobot@golang.org>
2022-02-04 17:15:28 -05:00
// sigPerThreadSyscall is only used on linux, so we assign a bogus signal
// number.
const sigPerThreadSyscall = 1 << 31
//go:nosplit
func runPerThreadSyscall() {
throw("runPerThreadSyscall only valid on linux")
}