runtime: add page tracer

This change adds a new GODEBUG flag called pagetrace that writes a
low-overhead trace of how pages of memory are managed by the Go runtime.

The page tracer is kept behind a GOEXPERIMENT flag due to a potential
security risk for setuid binaries.

Change-Id: I6f4a2447d02693c25214400846a5d2832ad6e5c0
Reviewed-on: https://go-review.googlesource.com/c/go/+/444157
Reviewed-by: Austin Clements <austin@google.com>
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
Michael Knyszek 2022-10-19 14:51:15 -04:00
parent 0613418c98
commit e4435cb844
51 changed files with 636 additions and 29 deletions

View file

@ -0,0 +1,9 @@
// Code generated by mkconsts.go. DO NOT EDIT.
//go:build !goexperiment.pagetrace
// +build !goexperiment.pagetrace
package goexperiment
const PageTrace = false
const PageTraceInt = 0

View file

@ -0,0 +1,9 @@
// Code generated by mkconsts.go. DO NOT EDIT.
//go:build goexperiment.pagetrace
// +build goexperiment.pagetrace
package goexperiment
const PageTrace = true
const PageTraceInt = 1

View file

@ -94,4 +94,10 @@ type Flags struct {
// Arenas causes the "arena" standard library package to be visible // Arenas causes the "arena" standard library package to be visible
// to the outside world. // to the outside world.
Arenas bool Arenas bool
// PageTrace enables GODEBUG=pagetrace=/path/to/result. This feature
// is a GOEXPERIMENT due to a security risk with setuid binaries:
// this compels the Go runtime to write to some arbitrary file, which
// may be exploited.
PageTrace bool
} }

View file

@ -0,0 +1,14 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !unix
package runtime
const canCreateFile = false
func create(name *byte, perm int32) int32 {
throw("unimplemented")
return -1
}

View file

@ -0,0 +1,14 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build unix
package runtime
const canCreateFile = true
// create returns an fd to a write-only file.
func create(name *byte, perm int32) int32 {
return open(name, _O_CREAT|_O_WRONLY|_O_TRUNC, perm)
}

View file

@ -8,7 +8,10 @@ const (
_EFAULT = 0xe _EFAULT = 0xe
_EAGAIN = 0x23 _EAGAIN = 0x23
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4 _O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x400000 _O_CLOEXEC = 0x400000
_PROT_NONE = 0x0 _PROT_NONE = 0x0

View file

@ -8,7 +8,10 @@ const (
_EFAULT = 0xe _EFAULT = 0xe
_EAGAIN = 0x23 _EAGAIN = 0x23
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4 _O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x400000 _O_CLOEXEC = 0x400000
_PROT_NONE = 0x0 _PROT_NONE = 0x0

View file

@ -8,7 +8,10 @@ const (
_EFAULT = 0xe _EFAULT = 0xe
_EAGAIN = 0x23 _EAGAIN = 0x23
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4 _O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x400000 _O_CLOEXEC = 0x400000
_PROT_NONE = 0x0 _PROT_NONE = 0x0

View file

@ -8,7 +8,10 @@ const (
_EFAULT = 0xe _EFAULT = 0xe
_EAGAIN = 0x23 _EAGAIN = 0x23
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4 _O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x400000 _O_CLOEXEC = 0x400000
_PROT_NONE = 0x0 _PROT_NONE = 0x0

View file

@ -91,7 +91,10 @@ const (
_MAXHOSTNAMELEN = 0x100 _MAXHOSTNAMELEN = 0x100
_O_WRONLY = 0x1
_O_NONBLOCK = 0x80 _O_NONBLOCK = 0x80
_O_TRUNC = 0x200
_O_CREAT = 0x100
_O_CLOEXEC = 0x800000 _O_CLOEXEC = 0x800000
_FD_CLOEXEC = 0x1 _FD_CLOEXEC = 0x1
_F_GETFL = 0x3 _F_GETFL = 0x3

View file

@ -124,7 +124,10 @@ const (
_ITIMER_PROF = C.ITIMER_PROF _ITIMER_PROF = C.ITIMER_PROF
_O_RDONLY = C.O_RDONLY _O_RDONLY = C.O_RDONLY
_O_WRONLY = C.O_WRONLY
_O_NONBLOCK = C.O_NONBLOCK _O_NONBLOCK = C.O_NONBLOCK
_O_CREAT = C.O_CREAT
_O_TRUNC = C.O_TRUNC
_SS_DISABLE = C.SS_DISABLE _SS_DISABLE = C.SS_DISABLE
_SI_USER = C.SI_USER _SI_USER = C.SI_USER

View file

@ -81,7 +81,10 @@ const (
_ITIMER_PROF = 0x2 _ITIMER_PROF = 0x2
_O_RDONLY = 0x0 _O_RDONLY = 0x0
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4 _O_NONBLOCK = 0x4
_O_CREAT = 0x100
_O_TRUNC = 0x200
_SS_DISABLE = 0x2 _SS_DISABLE = 0x2
_SI_USER = 0x0 _SI_USER = 0x0

View file

@ -120,7 +120,10 @@ const (
F_SETFL = C.F_SETFL F_SETFL = C.F_SETFL
FD_CLOEXEC = C.FD_CLOEXEC FD_CLOEXEC = C.FD_CLOEXEC
O_WRONLY = C.O_WRONLY
O_NONBLOCK = C.O_NONBLOCK O_NONBLOCK = C.O_NONBLOCK
O_CREAT = C.O_CREAT
O_TRUNC = C.O_TRUNC
) )
type StackT C.struct_sigaltstack type StackT C.struct_sigaltstack

View file

@ -99,7 +99,10 @@ const (
_F_SETFL = 0x4 _F_SETFL = 0x4
_FD_CLOEXEC = 0x1 _FD_CLOEXEC = 0x1
_O_NONBLOCK = 4 _O_WRONLY = 0x1
_O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
) )
type stackt struct { type stackt struct {

View file

@ -101,7 +101,10 @@ const (
_F_SETFL = 0x4 _F_SETFL = 0x4
_FD_CLOEXEC = 0x1 _FD_CLOEXEC = 0x1
_O_NONBLOCK = 4 _O_WRONLY = 0x1
_O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
) )
type stackt struct { type stackt struct {

View file

@ -32,7 +32,10 @@ const (
EBUSY = C.EBUSY EBUSY = C.EBUSY
EAGAIN = C.EAGAIN EAGAIN = C.EAGAIN
O_WRONLY = C.O_WRONLY
O_NONBLOCK = C.O_NONBLOCK O_NONBLOCK = C.O_NONBLOCK
O_CREAT = C.O_CREAT
O_TRUNC = C.O_TRUNC
O_CLOEXEC = C.O_CLOEXEC O_CLOEXEC = C.O_CLOEXEC
PROT_NONE = C.PROT_NONE PROT_NONE = C.PROT_NONE

View file

@ -11,7 +11,10 @@ const (
_EBUSY = 0x10 _EBUSY = 0x10
_EAGAIN = 0x23 _EAGAIN = 0x23
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4 _O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x20000 _O_CLOEXEC = 0x20000
_PROT_NONE = 0x0 _PROT_NONE = 0x0

View file

@ -51,7 +51,10 @@ const (
EAGAIN = C.EAGAIN EAGAIN = C.EAGAIN
ETIMEDOUT = C.ETIMEDOUT ETIMEDOUT = C.ETIMEDOUT
O_WRONLY = C.O_WRONLY
O_NONBLOCK = C.O_NONBLOCK O_NONBLOCK = C.O_NONBLOCK
O_CREAT = C.O_CREAT
O_TRUNC = C.O_TRUNC
O_CLOEXEC = C.O_CLOEXEC O_CLOEXEC = C.O_CLOEXEC
PROT_NONE = C.PROT_NONE PROT_NONE = C.PROT_NONE

View file

@ -19,7 +19,10 @@ const (
_EAGAIN = 0x23 _EAGAIN = 0x23
_ETIMEDOUT = 0x3c _ETIMEDOUT = 0x3c
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4 _O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x100000 _O_CLOEXEC = 0x100000
_PROT_NONE = 0x0 _PROT_NONE = 0x0

View file

@ -19,7 +19,10 @@ const (
_EAGAIN = 0x23 _EAGAIN = 0x23
_ETIMEDOUT = 0x3c _ETIMEDOUT = 0x3c
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4 _O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x100000 _O_CLOEXEC = 0x100000
_PROT_NONE = 0x0 _PROT_NONE = 0x0

View file

@ -19,7 +19,10 @@ const (
_EAGAIN = 0x23 _EAGAIN = 0x23
_ETIMEDOUT = 0x3c _ETIMEDOUT = 0x3c
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4 _O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x100000 _O_CLOEXEC = 0x100000
_PROT_NONE = 0x0 _PROT_NONE = 0x0

View file

@ -19,7 +19,10 @@ const (
_EAGAIN = 0x23 _EAGAIN = 0x23
_ETIMEDOUT = 0x3c _ETIMEDOUT = 0x3c
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4 _O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x100000 _O_CLOEXEC = 0x100000
_PROT_NONE = 0x0 _PROT_NONE = 0x0

View file

@ -18,7 +18,10 @@ const (
_EAGAIN = 0x23 _EAGAIN = 0x23
_ETIMEDOUT = 0x3c _ETIMEDOUT = 0x3c
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4 _O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x100000 _O_CLOEXEC = 0x100000
_PROT_NONE = 0x0 _PROT_NONE = 0x0

View file

@ -90,6 +90,9 @@ const (
_SIGEV_THREAD_ID = 0x4 _SIGEV_THREAD_ID = 0x4
_O_RDONLY = 0x0 _O_RDONLY = 0x0
_O_WRONLY = 0x1
_O_CREAT = 0x40
_O_TRUNC = 0x200
_O_NONBLOCK = 0x800 _O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000 _O_CLOEXEC = 0x80000

View file

@ -165,6 +165,9 @@ type sigevent struct {
const ( const (
_O_RDONLY = 0x0 _O_RDONLY = 0x0
_O_WRONLY = 0x1
_O_CREAT = 0x40
_O_TRUNC = 0x200
_O_NONBLOCK = 0x800 _O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000 _O_CLOEXEC = 0x80000
) )

View file

@ -80,6 +80,9 @@ const (
_ITIMER_PROF = 0x2 _ITIMER_PROF = 0x2
_ITIMER_VIRTUAL = 0x1 _ITIMER_VIRTUAL = 0x1
_O_RDONLY = 0 _O_RDONLY = 0
_O_WRONLY = 0x1
_O_CREAT = 0x40
_O_TRUNC = 0x200
_O_NONBLOCK = 0x800 _O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000 _O_CLOEXEC = 0x80000

View file

@ -165,6 +165,9 @@ type sigevent struct {
const ( const (
_O_RDONLY = 0x0 _O_RDONLY = 0x0
_O_WRONLY = 0x1
_O_CREAT = 0x40
_O_TRUNC = 0x200
_O_NONBLOCK = 0x800 _O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000 _O_CLOEXEC = 0x80000
) )

View file

@ -137,6 +137,9 @@ type sigevent struct {
const ( const (
_O_RDONLY = 0x0 _O_RDONLY = 0x0
_O_WRONLY = 0x1
_O_CREAT = 0x40
_O_TRUNC = 0x200
_O_NONBLOCK = 0x800 _O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000 _O_CLOEXEC = 0x80000
) )

View file

@ -169,6 +169,9 @@ type sigevent struct {
const ( const (
_O_RDONLY = 0x0 _O_RDONLY = 0x0
_O_WRONLY = 0x1
_O_CREAT = 0x100
_O_TRUNC = 0x200
_O_NONBLOCK = 0x80 _O_NONBLOCK = 0x80
_O_CLOEXEC = 0x80000 _O_CLOEXEC = 0x80000
_SA_RESTORER = 0 _SA_RESTORER = 0

View file

@ -163,7 +163,10 @@ type sigevent struct {
const ( const (
_O_RDONLY = 0x0 _O_RDONLY = 0x0
_O_WRONLY = 0x1
_O_NONBLOCK = 0x80 _O_NONBLOCK = 0x80
_O_CREAT = 0x100
_O_TRUNC = 0x200
_O_CLOEXEC = 0x80000 _O_CLOEXEC = 0x80000
_SA_RESTORER = 0 _SA_RESTORER = 0
) )

View file

@ -166,6 +166,9 @@ type sigevent struct {
const ( const (
_O_RDONLY = 0x0 _O_RDONLY = 0x0
_O_WRONLY = 0x1
_O_CREAT = 0x40
_O_TRUNC = 0x200
_O_NONBLOCK = 0x800 _O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000 _O_CLOEXEC = 0x80000
_SA_RESTORER = 0 _SA_RESTORER = 0

View file

@ -166,6 +166,9 @@ type sigevent struct {
const ( const (
_O_RDONLY = 0x0 _O_RDONLY = 0x0
_O_WRONLY = 0x1
_O_CREAT = 0x40
_O_TRUNC = 0x200
_O_NONBLOCK = 0x800 _O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000 _O_CLOEXEC = 0x80000
_SA_RESTORER = 0 _SA_RESTORER = 0

View file

@ -162,6 +162,9 @@ type sigevent struct {
const ( const (
_O_RDONLY = 0x0 _O_RDONLY = 0x0
_O_WRONLY = 0x1
_O_CREAT = 0x40
_O_TRUNC = 0x200
_O_NONBLOCK = 0x800 _O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000 _O_CLOEXEC = 0x80000
) )

View file

@ -159,6 +159,9 @@ type sigevent struct {
const ( const (
_O_RDONLY = 0x0 _O_RDONLY = 0x0
_O_WRONLY = 0x1
_O_CREAT = 0x40
_O_TRUNC = 0x200
_O_NONBLOCK = 0x800 _O_NONBLOCK = 0x800
_O_CLOEXEC = 0x80000 _O_CLOEXEC = 0x80000
_SA_RESTORER = 0 _SA_RESTORER = 0

View file

@ -34,7 +34,10 @@ const (
EFAULT = C.EFAULT EFAULT = C.EFAULT
EAGAIN = C.EAGAIN EAGAIN = C.EAGAIN
O_WRONLY = C.O_WRONLY
O_NONBLOCK = C.O_NONBLOCK O_NONBLOCK = C.O_NONBLOCK
O_CREAT = C.O_CREAT
O_TRUNC = C.O_TRUNC
O_CLOEXEC = C.O_CLOEXEC O_CLOEXEC = C.O_CLOEXEC
PROT_NONE = C.PROT_NONE PROT_NONE = C.PROT_NONE

View file

@ -10,7 +10,10 @@ const (
_EFAULT = 0xe _EFAULT = 0xe
_EAGAIN = 0x23 _EAGAIN = 0x23
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4 _O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x10000 _O_CLOEXEC = 0x10000
_PROT_NONE = 0x0 _PROT_NONE = 0x0

View file

@ -10,7 +10,10 @@ const (
_EFAULT = 0xe _EFAULT = 0xe
_EAGAIN = 0x23 _EAGAIN = 0x23
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4 _O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x10000 _O_CLOEXEC = 0x10000
_PROT_NONE = 0x0 _PROT_NONE = 0x0

View file

@ -10,7 +10,10 @@ const (
_EFAULT = 0xe _EFAULT = 0xe
_EAGAIN = 0x23 _EAGAIN = 0x23
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4 _O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x10000 _O_CLOEXEC = 0x10000
_PROT_NONE = 0x0 _PROT_NONE = 0x0

View file

@ -11,7 +11,10 @@ const (
_EFAULT = 0xe _EFAULT = 0xe
_EAGAIN = 0x23 _EAGAIN = 0x23
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4 _O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x10000 _O_CLOEXEC = 0x10000
_PROT_NONE = 0x0 _PROT_NONE = 0x0

View file

@ -17,7 +17,10 @@ const (
_EFAULT = 0xe _EFAULT = 0xe
_EAGAIN = 0x23 _EAGAIN = 0x23
_O_WRONLY = 0x1
_O_NONBLOCK = 0x4 _O_NONBLOCK = 0x4
_O_CREAT = 0x200
_O_TRUNC = 0x400
_O_CLOEXEC = 0x10000 _O_CLOEXEC = 0x10000
_PROT_NONE = 0x0 _PROT_NONE = 0x0

View file

@ -120,7 +120,10 @@ const (
MAXHOSTNAMELEN = C.MAXHOSTNAMELEN MAXHOSTNAMELEN = C.MAXHOSTNAMELEN
O_WRONLY = C.O_WRONLY
O_NONBLOCK = C.O_NONBLOCK O_NONBLOCK = C.O_NONBLOCK
O_CREAT = C.O_CREAT
O_TRUNC = C.O_TRUNC
O_CLOEXEC = C.O_CLOEXEC O_CLOEXEC = C.O_CLOEXEC
FD_CLOEXEC = C.FD_CLOEXEC FD_CLOEXEC = C.FD_CLOEXEC
F_GETFL = C.F_GETFL F_GETFL = C.F_GETFL

View file

@ -54,6 +54,7 @@ func runExitHooks(exitCode int) {
return return
} }
finishPageTrace()
for i := range exitHooks.hooks { for i := range exitHooks.hooks {
h := exitHooks.hooks[len(exitHooks.hooks)-i-1] h := exitHooks.hooks[len(exitHooks.hooks)-i-1]
if exitCode != 0 && !h.runOnNonZeroExit { if exitCode != 0 && !h.runOnNonZeroExit {

View file

@ -89,3 +89,9 @@ func waitForSigusr1Callback(gp *g) bool {
func SendSigusr1(mp *M) { func SendSigusr1(mp *M) {
signalM(mp, _SIGUSR1) signalM(mp, _SIGUSR1)
} }
const (
O_WRONLY = _O_WRONLY
O_CREAT = _O_CREAT
O_TRUNC = _O_TRUNC
)

View file

@ -127,6 +127,13 @@ It is a comma-separated list of name=val pairs setting these named variables:
When set to 0 memory profiling is disabled. Refer to the description of When set to 0 memory profiling is disabled. Refer to the description of
MemProfileRate for the default value. MemProfileRate for the default value.
pagetrace: setting pagetrace=/path/to/file will write out a trace of page events
that can be viewed, analyzed, and visualized using the x/debug/cmd/pagetrace tool.
Build your program with GOEXPERIMENT=pagetrace to enable this functionality. Do not
enable this functionality if your program is a setuid binary as it introduces a security
risk in that scenario. Currently not supported on Windows, plan9 or js/wasm. Setting this
option for some applications can produce large traces, so use with care.
invalidptr: invalidptr=1 (the default) causes the garbage collector and stack invalidptr: invalidptr=1 (the default) causes the garbage collector and stack
copier to crash the program if an invalid pointer value (for example, 1) copier to crash the program if an invalid pointer value (for example, 1)
is found in a pointer-typed location. Setting invalidptr=0 disables this check. is found in a pointer-typed location. Setting invalidptr=0 disables this check.

View file

@ -747,6 +747,8 @@ func (p *pageAlloc) scavengeOne(ci chunkIdx, searchIdx uint, max uintptr) uintpt
unlock(p.mheapLock) unlock(p.mheapLock)
if !p.test { if !p.test {
pageTraceScav(getg().m.p.ptr(), 0, addr, uintptr(npages))
// Only perform the actual scavenging if we're not in a test. // Only perform the actual scavenging if we're not in a test.
// It's dangerous to do so otherwise. // It's dangerous to do so otherwise.
sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize) sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize)

View file

@ -1306,6 +1306,7 @@ HaveSpan:
// There are a few very limited cirumstances where we won't have a P here. // There are a few very limited cirumstances where we won't have a P here.
// It's OK to simply skip scavenging in these cases. Something else will notice // It's OK to simply skip scavenging in these cases. Something else will notice
// and pick up the tab. // and pick up the tab.
var now int64
if pp != nil && bytesToScavenge > 0 { if pp != nil && bytesToScavenge > 0 {
// Measure how long we spent scavenging and add that measurement to the assist // Measure how long we spent scavenging and add that measurement to the assist
// time so we can track it for the GC CPU limiter. // time so we can track it for the GC CPU limiter.
@ -1321,7 +1322,7 @@ HaveSpan:
}) })
// Finish up accounting. // Finish up accounting.
now := nanotime() now = nanotime()
if track { if track {
pp.limiterEvent.stop(limiterEventScavengeAssist, now) pp.limiterEvent.stop(limiterEventScavengeAssist, now)
} }
@ -1360,6 +1361,7 @@ HaveSpan:
} }
memstats.heapStats.release() memstats.heapStats.release()
pageTraceAlloc(pp, now, base, npages)
return s return s
} }
@ -1535,6 +1537,8 @@ func (h *mheap) grow(npage uintptr) (uintptr, bool) {
// Free the span back into the heap. // Free the span back into the heap.
func (h *mheap) freeSpan(s *mspan) { func (h *mheap) freeSpan(s *mspan) {
systemstack(func() { systemstack(func() {
pageTraceFree(getg().m.p.ptr(), 0, s.base(), s.npages)
lock(&h.lock) lock(&h.lock)
if msanenabled { if msanenabled {
// Tell msan that this entire span is no longer in use. // Tell msan that this entire span is no longer in use.
@ -1565,6 +1569,8 @@ func (h *mheap) freeSpan(s *mspan) {
// //
//go:systemstack //go:systemstack
func (h *mheap) freeManual(s *mspan, typ spanAllocType) { func (h *mheap) freeManual(s *mspan, typ spanAllocType) {
pageTraceFree(getg().m.p.ptr(), 0, s.base(), s.npages)
s.needzero = 1 s.needzero = 1
lock(&h.lock) lock(&h.lock)
h.freeSpanLocked(s, typ) h.freeSpanLocked(s, typ)

View file

@ -0,0 +1,28 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !goexperiment.pagetrace
package runtime
//go:systemstack
func pageTraceAlloc(pp *p, now int64, base, npages uintptr) {
}
//go:systemstack
func pageTraceFree(pp *p, now int64, base, npages uintptr) {
}
//go:systemstack
func pageTraceScav(pp *p, now int64, base, npages uintptr) {
}
type pageTraceBuf struct {
}
func initPageTrace(env string) {
}
func finishPageTrace() {
}

358
src/runtime/pagetrace_on.go Normal file
View file

@ -0,0 +1,358 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.pagetrace
// Page tracer.
//
// This file contains an implementation of page trace instrumentation for tracking
// the way the Go runtime manages pages of memory. The trace may be enabled at program
// startup with the GODEBUG option pagetrace.
//
// Each page trace event is either 8 or 16 bytes wide. The first
// 8 bytes follow this format for non-sync events:
//
// [16 timestamp delta][35 base address][10 npages][1 isLarge][2 pageTraceEventType]
//
// If the "large" bit is set then the event is 16 bytes wide with the second 8 byte word
// containing the full npages value (the npages bitfield is 0).
//
// The base address's bottom pageShift bits are always zero hence why we can pack other
// data in there. We ignore the top 16 bits, assuming a 48 bit address space for the
// heap.
//
// The timestamp delta is computed from the difference between the current nanotime
// timestamp and the last sync event's timestamp. The bottom pageTraceTimeLostBits of
// this delta is removed and only the next pageTraceTimeDeltaBits are kept.
//
// A sync event is emitted at the beginning of each trace buffer and whenever the
// timestamp delta would not fit in an event.
//
// Sync events have the following structure:
//
// [61 timestamp or P ID][1 isPID][2 pageTraceSyncEvent]
//
// In essence, the "large" bit repurposed to indicate whether it's a timestamp or a P ID
// (these are typically uint32). Note that we only have 61 bits for the 64-bit timestamp,
// but like for the delta we drop the bottom pageTraceTimeLostBits here as well.
package runtime
import (
"runtime/internal/sys"
"unsafe"
)
// pageTraceAlloc records a page trace allocation event.
// pp may be nil. Call only if debug.pagetracefd != 0.
//
// Must run on the system stack as a crude way to prevent preemption.
//
//go:systemstack
func pageTraceAlloc(pp *p, now int64, base, npages uintptr) {
if pageTrace.enabled {
if now == 0 {
now = nanotime()
}
pageTraceEmit(pp, now, base, npages, pageTraceAllocEvent)
}
}
// pageTraceFree records a page trace free event.
// pp may be nil. Call only if debug.pagetracefd != 0.
//
// Must run on the system stack as a crude way to prevent preemption.
//
//go:systemstack
func pageTraceFree(pp *p, now int64, base, npages uintptr) {
if pageTrace.enabled {
if now == 0 {
now = nanotime()
}
pageTraceEmit(pp, now, base, npages, pageTraceFreeEvent)
}
}
// pageTraceScav records a page trace scavenge event.
// pp may be nil. Call only if debug.pagetracefd != 0.
//
// Must run on the system stack as a crude way to prevent preemption.
//
//go:systemstack
func pageTraceScav(pp *p, now int64, base, npages uintptr) {
if pageTrace.enabled {
if now == 0 {
now = nanotime()
}
pageTraceEmit(pp, now, base, npages, pageTraceScavEvent)
}
}
// pageTraceEventType is a page trace event type.
type pageTraceEventType uint8
const (
pageTraceSyncEvent pageTraceEventType = iota // Timestamp emission.
pageTraceAllocEvent // Allocation of pages.
pageTraceFreeEvent // Freeing pages.
pageTraceScavEvent // Scavenging pages.
)
// pageTraceEmit emits a page trace event.
//
// Must run on the system stack as a crude way to prevent preemption.
//
//go:systemstack
func pageTraceEmit(pp *p, now int64, base, npages uintptr, typ pageTraceEventType) {
// Get a buffer.
var tbp *pageTraceBuf
pid := int32(-1)
if pp == nil {
// We have no P, so take the global buffer.
lock(&pageTrace.lock)
tbp = &pageTrace.buf
} else {
tbp = &pp.pageTraceBuf
pid = pp.id
}
// Initialize the buffer if necessary.
tb := *tbp
if tb.buf == nil {
tb.buf = (*pageTraceEvents)(sysAlloc(pageTraceBufSize, &memstats.other_sys))
tb = tb.writePid(pid)
}
// Handle timestamp and emit a sync event if necessary.
if now < tb.timeBase {
now = tb.timeBase
}
if now-tb.timeBase >= pageTraceTimeMaxDelta {
tb.timeBase = now
tb = tb.writeSync(pid)
}
// Emit the event.
tb = tb.writeEvent(pid, now, base, npages, typ)
// Write back the buffer.
*tbp = tb
if pp == nil {
unlock(&pageTrace.lock)
}
}
const (
pageTraceBufSize = 32 << 10
// These constants describe the per-event timestamp delta encoding.
pageTraceTimeLostBits = 7 // How many bits of precision we lose in the delta.
pageTraceTimeDeltaBits = 16 // Size of the delta in bits.
pageTraceTimeMaxDelta = 1 << (pageTraceTimeLostBits + pageTraceTimeDeltaBits)
)
// pageTraceEvents is the low-level buffer containing the trace data.
type pageTraceEvents struct {
_ sys.NotInHeap
events [pageTraceBufSize / 8]uint64
}
// pageTraceBuf is a wrapper around pageTraceEvents that knows how to write events
// to the buffer. It tracks state necessary to do so.
type pageTraceBuf struct {
buf *pageTraceEvents
len int // How many events have been written so far.
timeBase int64 // The current timestamp base from which deltas are produced.
finished bool // Whether this trace buf should no longer flush anything out.
}
// writePid writes a P ID event indicating which P we're running on.
//
// Assumes there's always space in the buffer since this is only called at the
// beginning of a new buffer.
//
// Must run on the system stack as a crude way to prevent preemption.
//
//go:systemstack
func (tb pageTraceBuf) writePid(pid int32) pageTraceBuf {
e := uint64(int64(pid))<<3 | 0b100 | uint64(pageTraceSyncEvent)
tb.buf.events[tb.len] = e
tb.len++
return tb
}
// writeSync writes a sync event, which is just a timestamp. Handles flushing.
//
// Must run on the system stack as a crude way to prevent preemption.
//
//go:systemstack
func (tb pageTraceBuf) writeSync(pid int32) pageTraceBuf {
if tb.len+1 > len(tb.buf.events) {
// N.B. flush will writeSync again.
return tb.flush(pid, tb.timeBase)
}
e := ((uint64(tb.timeBase) >> pageTraceTimeLostBits) << 3) | uint64(pageTraceSyncEvent)
tb.buf.events[tb.len] = e
tb.len++
return tb
}
// writeEvent handles writing all non-sync and non-pid events. Handles flushing if necessary.
//
// pid indicates the P we're currently running on. Necessary in case we need to flush.
// now is the current nanotime timestamp.
// base is the base address of whatever group of pages this event is happening to.
// npages is the length of the group of pages this event is happening to.
// typ is the event that's happening to these pages.
//
// Must run on the system stack as a crude way to prevent preemption.
//
//go:systemstack
func (tb pageTraceBuf) writeEvent(pid int32, now int64, base, npages uintptr, typ pageTraceEventType) pageTraceBuf {
large := 0
np := npages
if npages >= 1024 {
large = 1
np = 0
}
if tb.len+1+large > len(tb.buf.events) {
tb = tb.flush(pid, now)
}
if base%pageSize != 0 {
throw("base address not page aligned")
}
e := uint64(base)
// The pageShift low-order bits are zero.
e |= uint64(typ) // 2 bits
e |= uint64(large) << 2 // 1 bit
e |= uint64(np) << 3 // 10 bits
// Write the timestamp delta in the upper pageTraceTimeDeltaBits.
e |= uint64((now-tb.timeBase)>>pageTraceTimeLostBits) << (64 - pageTraceTimeDeltaBits)
tb.buf.events[tb.len] = e
if large != 0 {
// npages doesn't fit in 10 bits, so write an additional word with that data.
tb.buf.events[tb.len+1] = uint64(npages)
}
tb.len += 1 + large
return tb
}
// flush writes out the contents of the buffer to pageTrace.fd and resets the buffer.
// It then writes out a P ID event and the first sync event for the new buffer.
//
// Must run on the system stack as a crude way to prevent preemption.
//
//go:systemstack
func (tb pageTraceBuf) flush(pid int32, now int64) pageTraceBuf {
if !tb.finished {
lock(&pageTrace.fdLock)
writeFull(uintptr(pageTrace.fd), (*byte)(unsafe.Pointer(&tb.buf.events[0])), tb.len*8)
unlock(&pageTrace.fdLock)
}
tb.len = 0
tb.timeBase = now
return tb.writePid(pid).writeSync(pid)
}
var pageTrace struct {
// enabled indicates whether tracing is enabled. If true, fd >= 0.
//
// Safe to read without synchronization because it's only set once
// at program initialization.
enabled bool
// buf is the page trace buffer used if there is no P.
//
// lock protects buf.
lock mutex
buf pageTraceBuf
// fdLock protects writing to fd.
//
// fd is the file to write the page trace to.
fdLock mutex
fd int32
}
// initPageTrace initializes the page tracing infrastructure from GODEBUG.
//
// env must be the value of the GODEBUG environment variable.
func initPageTrace(env string) {
var value string
for env != "" {
elt, rest := env, ""
for i := 0; i < len(env); i++ {
if env[i] == ',' {
elt, rest = env[:i], env[i+1:]
break
}
}
env = rest
if hasPrefix(elt, "pagetrace=") {
value = elt[len("pagetrace="):]
break
}
}
pageTrace.fd = -1
if canCreateFile && value != "" {
var tmp [4096]byte
if len(value) != 0 && len(value) < 4096 {
copy(tmp[:], value)
pageTrace.fd = create(&tmp[0], 0o664)
}
}
pageTrace.enabled = pageTrace.fd >= 0
}
// finishPageTrace flushes all P's trace buffers and disables page tracing.
func finishPageTrace() {
if !pageTrace.enabled {
return
}
// Grab worldsema as we're about to execute a ragged barrier.
semacquire(&worldsema)
systemstack(func() {
// Disable tracing. This isn't strictly necessary and it's best-effort.
pageTrace.enabled = false
// Execute a ragged barrier, flushing each trace buffer.
forEachP(func(pp *p) {
if pp.pageTraceBuf.buf != nil {
pp.pageTraceBuf = pp.pageTraceBuf.flush(pp.id, nanotime())
}
pp.pageTraceBuf.finished = true
})
// Write the global have-no-P buffer.
lock(&pageTrace.lock)
if pageTrace.buf.buf != nil {
pageTrace.buf = pageTrace.buf.flush(-1, nanotime())
}
pageTrace.buf.finished = true
unlock(&pageTrace.lock)
// Safely close the file as nothing else should be allowed to write to the fd.
lock(&pageTrace.fdLock)
closefd(pageTrace.fd)
pageTrace.fd = -1
unlock(&pageTrace.fdLock)
})
semrelease(&worldsema)
}
// writeFull ensures that a complete write of bn bytes from b is made to fd.
func writeFull(fd uintptr, b *byte, bn int) {
for bn > 0 {
n := write(fd, unsafe.Pointer(b), int32(bn))
if n == -_EINTR || n == -_EAGAIN {
continue
}
if n < 0 {
print("errno=", -n, "\n")
throw("writeBytes: bad write")
}
bn -= int(n)
b = addb(b, uintptr(n))
}
}

View file

@ -612,16 +612,39 @@ const (
_GoidCacheBatch = 16 _GoidCacheBatch = 16
) )
// cpuinit extracts the environment variable GODEBUG from the environment on // cpuinit sets up CPU feature flags and calls internal/cpu.Initialize. env should be the complete
// Unix-like operating systems and calls internal/cpu.Initialize. // value of the GODEBUG environment variable.
func cpuinit() { func cpuinit(env string) {
const prefix = "GODEBUG="
var env string
switch GOOS { switch GOOS {
case "aix", "darwin", "ios", "dragonfly", "freebsd", "netbsd", "openbsd", "illumos", "solaris", "linux": case "aix", "darwin", "ios", "dragonfly", "freebsd", "netbsd", "openbsd", "illumos", "solaris", "linux":
cpu.DebugOptions = true cpu.DebugOptions = true
}
cpu.Initialize(env)
// Support cpu feature variables are used in code generated by the compiler
// to guard execution of instructions that can not be assumed to be always supported.
switch GOARCH {
case "386", "amd64":
x86HasPOPCNT = cpu.X86.HasPOPCNT
x86HasSSE41 = cpu.X86.HasSSE41
x86HasFMA = cpu.X86.HasFMA
case "arm":
armHasVFPv4 = cpu.ARM.HasVFPv4
case "arm64":
arm64HasATOMICS = cpu.ARM64.HasATOMICS
}
}
// getGodebugEarly extracts the environment variable GODEBUG from the environment on
// Unix-like operating systems and returns it. This function exists to extract GODEBUG
// early before much of the runtime is initialized.
func getGodebugEarly() string {
const prefix = "GODEBUG="
var env string
switch GOOS {
case "aix", "darwin", "ios", "dragonfly", "freebsd", "netbsd", "openbsd", "illumos", "solaris", "linux":
// Similar to goenv_unix but extracts the environment value for // Similar to goenv_unix but extracts the environment value for
// GODEBUG directly. // GODEBUG directly.
// TODO(moehrmann): remove when general goenvs() can be called before cpuinit() // TODO(moehrmann): remove when general goenvs() can be called before cpuinit()
@ -640,23 +663,7 @@ func cpuinit() {
} }
} }
} }
return env
cpu.Initialize(env)
// Support cpu feature variables are used in code generated by the compiler
// to guard execution of instructions that can not be assumed to be always supported.
switch GOARCH {
case "386", "amd64":
x86HasPOPCNT = cpu.X86.HasPOPCNT
x86HasSSE41 = cpu.X86.HasSSE41
x86HasFMA = cpu.X86.HasFMA
case "arm":
armHasVFPv4 = cpu.ARM.HasVFPv4
case "arm64":
arm64HasATOMICS = cpu.ARM64.HasATOMICS
}
} }
// The bootstrap sequence is: // The bootstrap sequence is:
@ -703,9 +710,11 @@ func schedinit() {
moduledataverify() moduledataverify()
stackinit() stackinit()
mallocinit() mallocinit()
cpuinit() // must run before alginit godebug := getGodebugEarly()
alginit() // maps, hash, fastrand must not be used before this call initPageTrace(godebug) // must run after mallocinit but before anything allocates
fastrandinit() // must run before mcommoninit cpuinit(godebug) // must run before alginit
alginit() // maps, hash, fastrand must not be used before this call
fastrandinit() // must run before mcommoninit
mcommoninit(gp.m, -1) mcommoninit(gp.m, -1)
modulesinit() // provides activeModules modulesinit() // provides activeModules
typelinksinit() // uses maps, activeModules typelinksinit() // uses maps, activeModules

View file

@ -754,6 +754,11 @@ type p struct {
// scheduler ASAP (regardless of what G is running on it). // scheduler ASAP (regardless of what G is running on it).
preempt bool preempt bool
// pageTraceBuf is a buffer for writing out page allocation/free/scavenge traces.
//
// Used only if GOEXPERIMENT=pagetrace.
pageTraceBuf pageTraceBuf
// Padding is no longer needed. False sharing is now not a worry because p is large enough // Padding is no longer needed. False sharing is now not a worry because p is large enough
// that its size class is an integer multiple of the cache line size (for any of our architectures). // that its size class is an integer multiple of the cache line size (for any of our architectures).
} }

View file

@ -0,0 +1,25 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build unix
package runtime_test
import (
"runtime"
"syscall"
"testing"
)
func TestSyscallFlagAlignment(t *testing.T) {
// TODO(mknyszek): Check other flags.
check := func(name string, got, want int) {
if got != want {
t.Errorf("flag %s does not line up: got %d, want %d", name, got, want)
}
}
check("O_WRONLY", runtime.O_WRONLY, syscall.O_WRONLY)
check("O_CREAT", runtime.O_CREAT, syscall.O_CREAT)
check("O_TRUNC", runtime.O_TRUNC, syscall.O_TRUNC)
}