runtime: add page tracer

This change adds a new GODEBUG flag called pagetrace that writes a
low-overhead trace of how pages of memory are managed by the Go runtime.

The page tracer is kept behind a GOEXPERIMENT flag due to a potential
security risk for setuid binaries.

Change-Id: I6f4a2447d02693c25214400846a5d2832ad6e5c0
Reviewed-on: https://go-review.googlesource.com/c/go/+/444157
Reviewed-by: Austin Clements <austin@google.com>
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
Michael Knyszek 2022-10-19 14:51:15 -04:00
parent 0613418c98
commit e4435cb844
51 changed files with 636 additions and 29 deletions

View file

@ -0,0 +1,9 @@
// Code generated by mkconsts.go. DO NOT EDIT.
//go:build !goexperiment.pagetrace
// +build !goexperiment.pagetrace
package goexperiment
const PageTrace = false
const PageTraceInt = 0

View file

@ -0,0 +1,9 @@
// Code generated by mkconsts.go. DO NOT EDIT.
//go:build goexperiment.pagetrace
// +build goexperiment.pagetrace
package goexperiment
const PageTrace = true
const PageTraceInt = 1

View file

@ -94,4 +94,10 @@ type Flags struct {
// Arenas causes the "arena" standard library package to be visible
// to the outside world.
Arenas bool
// PageTrace enables GODEBUG=pagetrace=/path/to/result. This feature
// is a GOEXPERIMENT due to a security risk with setuid binaries:
// this compels the Go runtime to write to some arbitrary file, which
// may be exploited.
PageTrace bool
}

View file

@ -0,0 +1,14 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !unix
package runtime
const canCreateFile = false
func create(name *byte, perm int32) int32 {
throw("unimplemented")
return -1
}

View file

@ -0,0 +1,14 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build unix
package runtime
const canCreateFile = true
// create returns an fd to a write-only file.
func create(name *byte, perm int32) int32 {
return open(name, _O_CREAT|_O_WRONLY|_O_TRUNC, perm)
}

View file

@ -8,7 +8,10 @@ const (
_EFAULT = 0xe
_EAGAIN = 0x23
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x400000
_PROT_NONE = 0x0

View file

@ -8,7 +8,10 @@ const (
_EFAULT = 0xe
_EAGAIN = 0x23
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x400000
_PROT_NONE = 0x0

View file

@ -8,7 +8,10 @@ const (
_EFAULT = 0xe
_EAGAIN = 0x23
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x400000
_PROT_NONE = 0x0

View file

@ -8,7 +8,10 @@ const (
_EFAULT = 0xe
_EAGAIN = 0x23
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x400000
_PROT_NONE = 0x0

View file

@ -91,7 +91,10 @@ const (
_MAXHOSTNAMELEN = 0x100
_O_WRONLY = 0x1
_O_NONBLOCK = 0x80
_O_TRUNC = 0x200
_O_CREAT = 0x100
_O_CLOEXEC = 0x800000
_FD_CLOEXEC = 0x1
_F_GETFL = 0x3

View file

@ -124,7 +124,10 @@ const (
_ITIMER_PROF = C.ITIMER_PROF
_O_RDONLY = C.O_RDONLY
_O_WRONLY = C.O_WRONLY
_O_NONBLOCK = C.O_NONBLOCK
_O_CREAT = C.O_CREAT
_O_TRUNC = C.O_TRUNC
_SS_DISABLE = C.SS_DISABLE
_SI_USER = C.SI_USER

View file

@ -81,7 +81,10 @@ const (
_ITIMER_PROF = 0x2
_O_RDONLY = 0x0
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4
_O_CREAT = 0x100
_O_TRUNC = 0x200
_SS_DISABLE = 0x2
_SI_USER = 0x0

View file

@ -120,7 +120,10 @@ const (
F_SETFL = C.F_SETFL
FD_CLOEXEC = C.FD_CLOEXEC
O_WRONLY = C.O_WRONLY
O_NONBLOCK = C.O_NONBLOCK
O_CREAT = C.O_CREAT
O_TRUNC = C.O_TRUNC
)
type StackT C.struct_sigaltstack

View file

@ -99,7 +99,10 @@ const (
_F_SETFL = 0x4
_FD_CLOEXEC = 0x1
_O_NONBLOCK = 4
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
)
type stackt struct {

View file

@ -101,7 +101,10 @@ const (
_F_SETFL = 0x4
_FD_CLOEXEC = 0x1
_O_NONBLOCK = 4
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
)
type stackt struct {

View file

@ -32,7 +32,10 @@ const (
EBUSY = C.EBUSY
EAGAIN = C.EAGAIN
O_WRONLY = C.O_WRONLY
O_NONBLOCK = C.O_NONBLOCK
O_CREAT = C.O_CREAT
O_TRUNC = C.O_TRUNC
O_CLOEXEC = C.O_CLOEXEC
PROT_NONE = C.PROT_NONE

View file

@ -11,7 +11,10 @@ const (
_EBUSY = 0x10
_EAGAIN = 0x23
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x20000
_PROT_NONE = 0x0

View file

@ -51,7 +51,10 @@ const (
EAGAIN = C.EAGAIN
ETIMEDOUT = C.ETIMEDOUT
O_WRONLY = C.O_WRONLY
O_NONBLOCK = C.O_NONBLOCK
O_CREAT = C.O_CREAT
O_TRUNC = C.O_TRUNC
O_CLOEXEC = C.O_CLOEXEC
PROT_NONE = C.PROT_NONE

View file

@ -19,7 +19,10 @@ const (
_EAGAIN = 0x23
_ETIMEDOUT = 0x3c
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x100000
_PROT_NONE = 0x0

View file

@ -19,7 +19,10 @@ const (
_EAGAIN = 0x23
_ETIMEDOUT = 0x3c
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x100000
_PROT_NONE = 0x0

View file

@ -19,7 +19,10 @@ const (
_EAGAIN = 0x23
_ETIMEDOUT = 0x3c
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x100000
_PROT_NONE = 0x0

View file

@ -19,7 +19,10 @@ const (
_EAGAIN = 0x23
_ETIMEDOUT = 0x3c
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x100000
_PROT_NONE = 0x0

View file

@ -18,7 +18,10 @@ const (
_EAGAIN = 0x23
_ETIMEDOUT = 0x3c
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x100000
_PROT_NONE = 0x0

View file

@ -90,6 +90,9 @@ const (
_SIGEV_THREAD_ID = 0x4
_O_RDONLY = 0x0
_O_WRONLY = 0x1
_O_CREAT = 0x40
_O_TRUNC = 0x200
_O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000

View file

@ -165,6 +165,9 @@ type sigevent struct {
const (
_O_RDONLY = 0x0
_O_WRONLY = 0x1
_O_CREAT = 0x40
_O_TRUNC = 0x200
_O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000
)

View file

@ -80,6 +80,9 @@ const (
_ITIMER_PROF = 0x2
_ITIMER_VIRTUAL = 0x1
_O_RDONLY = 0
_O_WRONLY = 0x1
_O_CREAT = 0x40
_O_TRUNC = 0x200
_O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000

View file

@ -165,6 +165,9 @@ type sigevent struct {
const (
_O_RDONLY = 0x0
_O_WRONLY = 0x1
_O_CREAT = 0x40
_O_TRUNC = 0x200
_O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000
)

View file

@ -137,6 +137,9 @@ type sigevent struct {
const (
_O_RDONLY = 0x0
_O_WRONLY = 0x1
_O_CREAT = 0x40
_O_TRUNC = 0x200
_O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000
)

View file

@ -169,6 +169,9 @@ type sigevent struct {
const (
_O_RDONLY = 0x0
_O_WRONLY = 0x1
_O_CREAT = 0x100
_O_TRUNC = 0x200
_O_NONBLOCK = 0x80
_O_CLOEXEC = 0x80000
_SA_RESTORER = 0

View file

@ -163,7 +163,10 @@ type sigevent struct {
const (
_O_RDONLY = 0x0
_O_WRONLY = 0x1
_O_NONBLOCK = 0x80
_O_CREAT = 0x100
_O_TRUNC = 0x200
_O_CLOEXEC = 0x80000
_SA_RESTORER = 0
)

View file

@ -166,6 +166,9 @@ type sigevent struct {
const (
_O_RDONLY = 0x0
_O_WRONLY = 0x1
_O_CREAT = 0x40
_O_TRUNC = 0x200
_O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000
_SA_RESTORER = 0

View file

@ -166,6 +166,9 @@ type sigevent struct {
const (
_O_RDONLY = 0x0
_O_WRONLY = 0x1
_O_CREAT = 0x40
_O_TRUNC = 0x200
_O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000
_SA_RESTORER = 0

View file

@ -162,6 +162,9 @@ type sigevent struct {
const (
_O_RDONLY = 0x0
_O_WRONLY = 0x1
_O_CREAT = 0x40
_O_TRUNC = 0x200
_O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000
)

View file

@ -159,6 +159,9 @@ type sigevent struct {
const (
_O_RDONLY = 0x0
_O_WRONLY = 0x1
_O_CREAT = 0x40
_O_TRUNC = 0x200
_O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000
_SA_RESTORER = 0

View file

@ -34,7 +34,10 @@ const (
EFAULT = C.EFAULT
EAGAIN = C.EAGAIN
O_WRONLY = C.O_WRONLY
O_NONBLOCK = C.O_NONBLOCK
O_CREAT = C.O_CREAT
O_TRUNC = C.O_TRUNC
O_CLOEXEC = C.O_CLOEXEC
PROT_NONE = C.PROT_NONE

View file

@ -10,7 +10,10 @@ const (
_EFAULT = 0xe
_EAGAIN = 0x23
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x10000
_PROT_NONE = 0x0

View file

@ -10,7 +10,10 @@ const (
_EFAULT = 0xe
_EAGAIN = 0x23
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x10000
_PROT_NONE = 0x0

View file

@ -10,7 +10,10 @@ const (
_EFAULT = 0xe
_EAGAIN = 0x23
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x10000
_PROT_NONE = 0x0

View file

@ -11,7 +11,10 @@ const (
_EFAULT = 0xe
_EAGAIN = 0x23
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x10000
_PROT_NONE = 0x0

View file

@ -17,7 +17,10 @@ const (
_EFAULT = 0xe
_EAGAIN = 0x23
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x10000
_PROT_NONE = 0x0

View file

@ -120,7 +120,10 @@ const (
MAXHOSTNAMELEN = C.MAXHOSTNAMELEN
O_WRONLY = C.O_WRONLY
O_NONBLOCK = C.O_NONBLOCK
O_CREAT = C.O_CREAT
O_TRUNC = C.O_TRUNC
O_CLOEXEC = C.O_CLOEXEC
FD_CLOEXEC = C.FD_CLOEXEC
F_GETFL = C.F_GETFL

View file

@ -54,6 +54,7 @@ func runExitHooks(exitCode int) {
return
}
finishPageTrace()
for i := range exitHooks.hooks {
h := exitHooks.hooks[len(exitHooks.hooks)-i-1]
if exitCode != 0 && !h.runOnNonZeroExit {

View file

@ -89,3 +89,9 @@ func waitForSigusr1Callback(gp *g) bool {
func SendSigusr1(mp *M) {
signalM(mp, _SIGUSR1)
}
const (
O_WRONLY = _O_WRONLY
O_CREAT = _O_CREAT
O_TRUNC = _O_TRUNC
)

View file

@ -127,6 +127,13 @@ It is a comma-separated list of name=val pairs setting these named variables:
When set to 0 memory profiling is disabled. Refer to the description of
MemProfileRate for the default value.
pagetrace: setting pagetrace=/path/to/file will write out a trace of page events
that can be viewed, analyzed, and visualized using the x/debug/cmd/pagetrace tool.
Build your program with GOEXPERIMENT=pagetrace to enable this functionality. Do not
enable this functionality if your program is a setuid binary as it introduces a security
risk in that scenario. Currently not supported on Windows, plan9 or js/wasm. Setting this
option for some applications can produce large traces, so use with care.
invalidptr: invalidptr=1 (the default) causes the garbage collector and stack
copier to crash the program if an invalid pointer value (for example, 1)
is found in a pointer-typed location. Setting invalidptr=0 disables this check.

View file

@ -747,6 +747,8 @@ func (p *pageAlloc) scavengeOne(ci chunkIdx, searchIdx uint, max uintptr) uintpt
unlock(p.mheapLock)
if !p.test {
pageTraceScav(getg().m.p.ptr(), 0, addr, uintptr(npages))
// Only perform the actual scavenging if we're not in a test.
// It's dangerous to do so otherwise.
sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize)

View file

@ -1306,6 +1306,7 @@ HaveSpan:
// There are a few very limited cirumstances where we won't have a P here.
// It's OK to simply skip scavenging in these cases. Something else will notice
// and pick up the tab.
var now int64
if pp != nil && bytesToScavenge > 0 {
// Measure how long we spent scavenging and add that measurement to the assist
// time so we can track it for the GC CPU limiter.
@ -1321,7 +1322,7 @@ HaveSpan:
})
// Finish up accounting.
now := nanotime()
now = nanotime()
if track {
pp.limiterEvent.stop(limiterEventScavengeAssist, now)
}
@ -1360,6 +1361,7 @@ HaveSpan:
}
memstats.heapStats.release()
pageTraceAlloc(pp, now, base, npages)
return s
}
@ -1535,6 +1537,8 @@ func (h *mheap) grow(npage uintptr) (uintptr, bool) {
// Free the span back into the heap.
func (h *mheap) freeSpan(s *mspan) {
systemstack(func() {
pageTraceFree(getg().m.p.ptr(), 0, s.base(), s.npages)
lock(&h.lock)
if msanenabled {
// Tell msan that this entire span is no longer in use.
@ -1565,6 +1569,8 @@ func (h *mheap) freeSpan(s *mspan) {
//
//go:systemstack
func (h *mheap) freeManual(s *mspan, typ spanAllocType) {
pageTraceFree(getg().m.p.ptr(), 0, s.base(), s.npages)
s.needzero = 1
lock(&h.lock)
h.freeSpanLocked(s, typ)

View file

@ -0,0 +1,28 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.pagetrace
package runtime
//go:systemstack
func pageTraceAlloc(pp *p, now int64, base, npages uintptr) {
}
//go:systemstack
func pageTraceFree(pp *p, now int64, base, npages uintptr) {
}
//go:systemstack
func pageTraceScav(pp *p, now int64, base, npages uintptr) {
}
type pageTraceBuf struct {
}
func initPageTrace(env string) {
}
func finishPageTrace() {
}

358
src/runtime/pagetrace_on.go Normal file
View file

@ -0,0 +1,358 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.pagetrace
// Page tracer.
//
// This file contains an implementation of page trace instrumentation for tracking
// the way the Go runtime manages pages of memory. The trace may be enabled at program
// startup with the GODEBUG option pagetrace.
//
// Each page trace event is either 8 or 16 bytes wide. The first
// 8 bytes follow this format for non-sync events:
//
// [16 timestamp delta][35 base address][10 npages][1 isLarge][2 pageTraceEventType]
//
// If the "large" bit is set then the event is 16 bytes wide with the second 8 byte word
// containing the full npages value (the npages bitfield is 0).
//
// The base address's bottom pageShift bits are always zero hence why we can pack other
// data in there. We ignore the top 16 bits, assuming a 48 bit address space for the
// heap.
//
// The timestamp delta is computed from the difference between the current nanotime
// timestamp and the last sync event's timestamp. The bottom pageTraceTimeLostBits of
// this delta is removed and only the next pageTraceTimeDeltaBits are kept.
//
// A sync event is emitted at the beginning of each trace buffer and whenever the
// timestamp delta would not fit in an event.
//
// Sync events have the following structure:
//
// [61 timestamp or P ID][1 isPID][2 pageTraceSyncEvent]
//
// In essence, the "large" bit repurposed to indicate whether it's a timestamp or a P ID
// (these are typically uint32). Note that we only have 61 bits for the 64-bit timestamp,
// but like for the delta we drop the bottom pageTraceTimeLostBits here as well.
package runtime
import (
"runtime/internal/sys"
"unsafe"
)
// pageTraceAlloc records a page trace allocation event.
// pp may be nil. Call only if debug.pagetracefd != 0.
//
// Must run on the system stack as a crude way to prevent preemption.
//
//go:systemstack
func pageTraceAlloc(pp *p, now int64, base, npages uintptr) {
if pageTrace.enabled {
if now == 0 {
now = nanotime()
}
pageTraceEmit(pp, now, base, npages, pageTraceAllocEvent)
}
}
// pageTraceFree records a page trace free event.
// pp may be nil. Call only if debug.pagetracefd != 0.
//
// Must run on the system stack as a crude way to prevent preemption.
//
//go:systemstack
func pageTraceFree(pp *p, now int64, base, npages uintptr) {
if pageTrace.enabled {
if now == 0 {
now = nanotime()
}
pageTraceEmit(pp, now, base, npages, pageTraceFreeEvent)
}
}
// pageTraceScav records a page trace scavenge event.
// pp may be nil. Call only if debug.pagetracefd != 0.
//
// Must run on the system stack as a crude way to prevent preemption.
//
//go:systemstack
func pageTraceScav(pp *p, now int64, base, npages uintptr) {
if pageTrace.enabled {
if now == 0 {
now = nanotime()
}
pageTraceEmit(pp, now, base, npages, pageTraceScavEvent)
}
}
// pageTraceEventType is a page trace event type.
type pageTraceEventType uint8
const (
pageTraceSyncEvent pageTraceEventType = iota // Timestamp emission.
pageTraceAllocEvent // Allocation of pages.
pageTraceFreeEvent // Freeing pages.
pageTraceScavEvent // Scavenging pages.
)
// pageTraceEmit emits a page trace event.
//
// Must run on the system stack as a crude way to prevent preemption.
//
//go:systemstack
func pageTraceEmit(pp *p, now int64, base, npages uintptr, typ pageTraceEventType) {
// Get a buffer.
var tbp *pageTraceBuf
pid := int32(-1)
if pp == nil {
// We have no P, so take the global buffer.
lock(&pageTrace.lock)
tbp = &pageTrace.buf
} else {
tbp = &pp.pageTraceBuf
pid = pp.id
}
// Initialize the buffer if necessary.
tb := *tbp
if tb.buf == nil {
tb.buf = (*pageTraceEvents)(sysAlloc(pageTraceBufSize, &memstats.other_sys))
tb = tb.writePid(pid)
}
// Handle timestamp and emit a sync event if necessary.
if now < tb.timeBase {
now = tb.timeBase
}
if now-tb.timeBase >= pageTraceTimeMaxDelta {
tb.timeBase = now
tb = tb.writeSync(pid)
}
// Emit the event.
tb = tb.writeEvent(pid, now, base, npages, typ)
// Write back the buffer.
*tbp = tb
if pp == nil {
unlock(&pageTrace.lock)
}
}
const (
pageTraceBufSize = 32 << 10
// These constants describe the per-event timestamp delta encoding.
pageTraceTimeLostBits = 7 // How many bits of precision we lose in the delta.
pageTraceTimeDeltaBits = 16 // Size of the delta in bits.
pageTraceTimeMaxDelta = 1 << (pageTraceTimeLostBits + pageTraceTimeDeltaBits)
)
// pageTraceEvents is the low-level buffer containing the trace data.
type pageTraceEvents struct {
_ sys.NotInHeap
events [pageTraceBufSize / 8]uint64
}
// pageTraceBuf is a wrapper around pageTraceEvents that knows how to write events
// to the buffer. It tracks state necessary to do so.
type pageTraceBuf struct {
buf *pageTraceEvents
len int // How many events have been written so far.
timeBase int64 // The current timestamp base from which deltas are produced.
finished bool // Whether this trace buf should no longer flush anything out.
}
// writePid writes a P ID event indicating which P we're running on.
//
// Assumes there's always space in the buffer since this is only called at the
// beginning of a new buffer.
//
// Must run on the system stack as a crude way to prevent preemption.
//
//go:systemstack
func (tb pageTraceBuf) writePid(pid int32) pageTraceBuf {
e := uint64(int64(pid))<<3 | 0b100 | uint64(pageTraceSyncEvent)
tb.buf.events[tb.len] = e
tb.len++
return tb
}
// writeSync writes a sync event, which is just a timestamp. Handles flushing.
//
// Must run on the system stack as a crude way to prevent preemption.
//
//go:systemstack
func (tb pageTraceBuf) writeSync(pid int32) pageTraceBuf {
if tb.len+1 > len(tb.buf.events) {
// N.B. flush will writeSync again.
return tb.flush(pid, tb.timeBase)
}
e := ((uint64(tb.timeBase) >> pageTraceTimeLostBits) << 3) | uint64(pageTraceSyncEvent)
tb.buf.events[tb.len] = e
tb.len++
return tb
}
// writeEvent handles writing all non-sync and non-pid events. Handles flushing if necessary.
//
// pid indicates the P we're currently running on. Necessary in case we need to flush.
// now is the current nanotime timestamp.
// base is the base address of whatever group of pages this event is happening to.
// npages is the length of the group of pages this event is happening to.
// typ is the event that's happening to these pages.
//
// Must run on the system stack as a crude way to prevent preemption.
//
//go:systemstack
func (tb pageTraceBuf) writeEvent(pid int32, now int64, base, npages uintptr, typ pageTraceEventType) pageTraceBuf {
large := 0
np := npages
if npages >= 1024 {
large = 1
np = 0
}
if tb.len+1+large > len(tb.buf.events) {
tb = tb.flush(pid, now)
}
if base%pageSize != 0 {
throw("base address not page aligned")
}
e := uint64(base)
// The pageShift low-order bits are zero.
e |= uint64(typ) // 2 bits
e |= uint64(large) << 2 // 1 bit
e |= uint64(np) << 3 // 10 bits
// Write the timestamp delta in the upper pageTraceTimeDeltaBits.
e |= uint64((now-tb.timeBase)>>pageTraceTimeLostBits) << (64 - pageTraceTimeDeltaBits)
tb.buf.events[tb.len] = e
if large != 0 {
// npages doesn't fit in 10 bits, so write an additional word with that data.
tb.buf.events[tb.len+1] = uint64(npages)
}
tb.len += 1 + large
return tb
}
// flush writes out the contents of the buffer to pageTrace.fd and resets the buffer.
// It then writes out a P ID event and the first sync event for the new buffer.
//
// Must run on the system stack as a crude way to prevent preemption.
//
//go:systemstack
func (tb pageTraceBuf) flush(pid int32, now int64) pageTraceBuf {
if !tb.finished {
lock(&pageTrace.fdLock)
writeFull(uintptr(pageTrace.fd), (*byte)(unsafe.Pointer(&tb.buf.events[0])), tb.len*8)
unlock(&pageTrace.fdLock)
}
tb.len = 0
tb.timeBase = now
return tb.writePid(pid).writeSync(pid)
}
var pageTrace struct {
// enabled indicates whether tracing is enabled. If true, fd >= 0.
//
// Safe to read without synchronization because it's only set once
// at program initialization.
enabled bool
// buf is the page trace buffer used if there is no P.
//
// lock protects buf.
lock mutex
buf pageTraceBuf
// fdLock protects writing to fd.
//
// fd is the file to write the page trace to.
fdLock mutex
fd int32
}
// initPageTrace initializes the page tracing infrastructure from GODEBUG.
//
// env must be the value of the GODEBUG environment variable.
func initPageTrace(env string) {
var value string
for env != "" {
elt, rest := env, ""
for i := 0; i < len(env); i++ {
if env[i] == ',' {
elt, rest = env[:i], env[i+1:]
break
}
}
env = rest
if hasPrefix(elt, "pagetrace=") {
value = elt[len("pagetrace="):]
break
}
}
pageTrace.fd = -1
if canCreateFile && value != "" {
var tmp [4096]byte
if len(value) != 0 && len(value) < 4096 {
copy(tmp[:], value)
pageTrace.fd = create(&tmp[0], 0o664)
}
}
pageTrace.enabled = pageTrace.fd >= 0
}
// finishPageTrace flushes all P's trace buffers and disables page tracing.
func finishPageTrace() {
if !pageTrace.enabled {
return
}
// Grab worldsema as we're about to execute a ragged barrier.
semacquire(&worldsema)
systemstack(func() {
// Disable tracing. This isn't strictly necessary and it's best-effort.
pageTrace.enabled = false
// Execute a ragged barrier, flushing each trace buffer.
forEachP(func(pp *p) {
if pp.pageTraceBuf.buf != nil {
pp.pageTraceBuf = pp.pageTraceBuf.flush(pp.id, nanotime())
}
pp.pageTraceBuf.finished = true
})
// Write the global have-no-P buffer.
lock(&pageTrace.lock)
if pageTrace.buf.buf != nil {
pageTrace.buf = pageTrace.buf.flush(-1, nanotime())
}
pageTrace.buf.finished = true
unlock(&pageTrace.lock)
// Safely close the file as nothing else should be allowed to write to the fd.
lock(&pageTrace.fdLock)
closefd(pageTrace.fd)
pageTrace.fd = -1
unlock(&pageTrace.fdLock)
})
semrelease(&worldsema)
}
// writeFull ensures that a complete write of bn bytes from b is made to fd.
func writeFull(fd uintptr, b *byte, bn int) {
for bn > 0 {
n := write(fd, unsafe.Pointer(b), int32(bn))
if n == -_EINTR || n == -_EAGAIN {
continue
}
if n < 0 {
print("errno=", -n, "\n")
throw("writeBytes: bad write")
}
bn -= int(n)
b = addb(b, uintptr(n))
}
}

View file

@ -612,16 +612,39 @@ const (
_GoidCacheBatch = 16
)
// cpuinit extracts the environment variable GODEBUG from the environment on
// Unix-like operating systems and calls internal/cpu.Initialize.
func cpuinit() {
const prefix = "GODEBUG="
var env string
// cpuinit sets up CPU feature flags and calls internal/cpu.Initialize. env should be the complete
// value of the GODEBUG environment variable.
func cpuinit(env string) {
switch GOOS {
case "aix", "darwin", "ios", "dragonfly", "freebsd", "netbsd", "openbsd", "illumos", "solaris", "linux":
cpu.DebugOptions = true
}
cpu.Initialize(env)
// Support cpu feature variables are used in code generated by the compiler
// to guard execution of instructions that can not be assumed to be always supported.
switch GOARCH {
case "386", "amd64":
x86HasPOPCNT = cpu.X86.HasPOPCNT
x86HasSSE41 = cpu.X86.HasSSE41
x86HasFMA = cpu.X86.HasFMA
case "arm":
armHasVFPv4 = cpu.ARM.HasVFPv4
case "arm64":
arm64HasATOMICS = cpu.ARM64.HasATOMICS
}
}
// getGodebugEarly extracts the environment variable GODEBUG from the environment on
// Unix-like operating systems and returns it. This function exists to extract GODEBUG
// early before much of the runtime is initialized.
func getGodebugEarly() string {
const prefix = "GODEBUG="
var env string
switch GOOS {
case "aix", "darwin", "ios", "dragonfly", "freebsd", "netbsd", "openbsd", "illumos", "solaris", "linux":
// Similar to goenv_unix but extracts the environment value for
// GODEBUG directly.
// TODO(moehrmann): remove when general goenvs() can be called before cpuinit()
@ -640,23 +663,7 @@ func cpuinit() {
}
}
}
cpu.Initialize(env)
// Support cpu feature variables are used in code generated by the compiler
// to guard execution of instructions that can not be assumed to be always supported.
switch GOARCH {
case "386", "amd64":
x86HasPOPCNT = cpu.X86.HasPOPCNT
x86HasSSE41 = cpu.X86.HasSSE41
x86HasFMA = cpu.X86.HasFMA
case "arm":
armHasVFPv4 = cpu.ARM.HasVFPv4
case "arm64":
arm64HasATOMICS = cpu.ARM64.HasATOMICS
}
return env
}
// The bootstrap sequence is:
@ -703,9 +710,11 @@ func schedinit() {
moduledataverify()
stackinit()
mallocinit()
cpuinit() // must run before alginit
alginit() // maps, hash, fastrand must not be used before this call
fastrandinit() // must run before mcommoninit
godebug := getGodebugEarly()
initPageTrace(godebug) // must run after mallocinit but before anything allocates
cpuinit(godebug) // must run before alginit
alginit() // maps, hash, fastrand must not be used before this call
fastrandinit() // must run before mcommoninit
mcommoninit(gp.m, -1)
modulesinit() // provides activeModules
typelinksinit() // uses maps, activeModules

View file

@ -754,6 +754,11 @@ type p struct {
// scheduler ASAP (regardless of what G is running on it).
preempt bool
// pageTraceBuf is a buffer for writing out page allocation/free/scavenge traces.
//
// Used only if GOEXPERIMENT=pagetrace.
pageTraceBuf pageTraceBuf
// Padding is no longer needed. False sharing is now not a worry because p is large enough
// that its size class is an integer multiple of the cache line size (for any of our architectures).
}

View file

@ -0,0 +1,25 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build unix
package runtime_test
import (
"runtime"
"syscall"
"testing"
)
func TestSyscallFlagAlignment(t *testing.T) {
// TODO(mknyszek): Check other flags.
check := func(name string, got, want int) {
if got != want {
t.Errorf("flag %s does not line up: got %d, want %d", name, got, want)
}
}
check("O_WRONLY", runtime.O_WRONLY, syscall.O_WRONLY)
check("O_CREAT", runtime.O_CREAT, syscall.O_CREAT)
check("O_TRUNC", runtime.O_TRUNC, syscall.O_TRUNC)
}