go/src/runtime/metrics_test.go
Michael Anthony Knyszek e23edf5e55 runtime: don't re-read metrics before check in TestReadMetricsSched
The two remaining popular flakes in TestReadMetricsSched are with
waiting and not-in-go goroutines. I believe the reason for both of these
is pollution due to other goroutines in the test binary, and we can be a
little bit more robust to them.

In particular, both of these tests spin until there's a particular
condition met in the metrics. Then they both immediately re-read the
metrics. The metrics being checked are not guaranteed to stay that way
in the re-read. For example, for the waiting case, it's possible for
>1000 to be waiting, but then some leftover goroutines from a previous
test wake up off of some condition, causing the number to dip again on
the re-read. This is supported by the fact that in some of these
failures, the test takes very little time to execute (the condition that
there are at least 1000 waiting goroutines is almost immediately
satisfied) but it still fails (the re-read causes us to notice that there
are <1000 waiting goroutines).

This change makes the test more robust by not performing this re-read.
This means more possible false negatives, but the error cases we're
looking for (bad metrics) should still show up with some reasonable
probability when something *is* truly wrong.

For #75049.

Change-Id: Idc3a8030bed8da51f24322efe15f3ff13da05623
Reviewed-on: https://go-review.googlesource.com/c/go/+/705875
Reviewed-by: Michael Pratt <mpratt@google.com>
Auto-Submit: Michael Knyszek <mknyszek@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2025-09-22 11:00:25 -07:00

1788 lines
57 KiB
Go

// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runtime_test
import (
"bytes"
"internal/abi"
"internal/goexperiment"
"internal/profile"
"internal/testenv"
"os"
"reflect"
"runtime"
"runtime/debug"
"runtime/metrics"
"runtime/pprof"
"runtime/trace"
"slices"
"sort"
"strings"
"sync"
"sync/atomic"
"syscall"
"testing"
"time"
"unsafe"
)
func prepareAllMetricsSamples() (map[string]metrics.Description, []metrics.Sample) {
all := metrics.All()
samples := make([]metrics.Sample, len(all))
descs := make(map[string]metrics.Description)
for i := range all {
samples[i].Name = all[i].Name
descs[all[i].Name] = all[i]
}
return descs, samples
}
func TestReadMetrics(t *testing.T) {
// Run a GC cycle to get some of the stats to be non-zero.
runtime.GC()
// Set an arbitrary memory limit to check the metric for it
limit := int64(512 * 1024 * 1024)
oldLimit := debug.SetMemoryLimit(limit)
defer debug.SetMemoryLimit(oldLimit)
// Set a GC percent to check the metric for it
gcPercent := 99
oldGCPercent := debug.SetGCPercent(gcPercent)
defer debug.SetGCPercent(oldGCPercent)
// Tests whether readMetrics produces values aligning
// with ReadMemStats while the world is stopped.
var mstats runtime.MemStats
_, samples := prepareAllMetricsSamples()
runtime.ReadMetricsSlow(&mstats, unsafe.Pointer(&samples[0]), len(samples), cap(samples))
checkUint64 := func(t *testing.T, m string, got, want uint64) {
t.Helper()
if got != want {
t.Errorf("metric %q: got %d, want %d", m, got, want)
}
}
// Check to make sure the values we read line up with other values we read.
var allocsBySize, gcPauses, schedPausesTotalGC *metrics.Float64Histogram
var tinyAllocs uint64
var mallocs, frees uint64
for i := range samples {
switch name := samples[i].Name; name {
case "/cgo/go-to-c-calls:calls":
checkUint64(t, name, samples[i].Value.Uint64(), uint64(runtime.NumCgoCall()))
case "/memory/classes/heap/free:bytes":
checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapIdle-mstats.HeapReleased)
case "/memory/classes/heap/released:bytes":
checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapReleased)
case "/memory/classes/heap/objects:bytes":
checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapAlloc)
case "/memory/classes/heap/unused:bytes":
checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapInuse-mstats.HeapAlloc)
case "/memory/classes/heap/stacks:bytes":
checkUint64(t, name, samples[i].Value.Uint64(), mstats.StackInuse)
case "/memory/classes/metadata/mcache/free:bytes":
checkUint64(t, name, samples[i].Value.Uint64(), mstats.MCacheSys-mstats.MCacheInuse)
case "/memory/classes/metadata/mcache/inuse:bytes":
checkUint64(t, name, samples[i].Value.Uint64(), mstats.MCacheInuse)
case "/memory/classes/metadata/mspan/free:bytes":
checkUint64(t, name, samples[i].Value.Uint64(), mstats.MSpanSys-mstats.MSpanInuse)
case "/memory/classes/metadata/mspan/inuse:bytes":
checkUint64(t, name, samples[i].Value.Uint64(), mstats.MSpanInuse)
case "/memory/classes/metadata/other:bytes":
checkUint64(t, name, samples[i].Value.Uint64(), mstats.GCSys)
case "/memory/classes/os-stacks:bytes":
checkUint64(t, name, samples[i].Value.Uint64(), mstats.StackSys-mstats.StackInuse)
case "/memory/classes/other:bytes":
checkUint64(t, name, samples[i].Value.Uint64(), mstats.OtherSys)
case "/memory/classes/profiling/buckets:bytes":
checkUint64(t, name, samples[i].Value.Uint64(), mstats.BuckHashSys)
case "/memory/classes/total:bytes":
checkUint64(t, name, samples[i].Value.Uint64(), mstats.Sys)
case "/gc/heap/allocs-by-size:bytes":
hist := samples[i].Value.Float64Histogram()
// Skip size class 0 in BySize, because it's always empty and not represented
// in the histogram.
for i, sc := range mstats.BySize[1:] {
if b, s := hist.Buckets[i+1], float64(sc.Size+1); b != s {
t.Errorf("bucket does not match size class: got %f, want %f", b, s)
// The rest of the checks aren't expected to work anyway.
continue
}
if c, m := hist.Counts[i], sc.Mallocs; c != m {
t.Errorf("histogram counts do not much BySize for class %d: got %d, want %d", i, c, m)
}
}
allocsBySize = hist
case "/gc/heap/allocs:bytes":
checkUint64(t, name, samples[i].Value.Uint64(), mstats.TotalAlloc)
case "/gc/heap/frees-by-size:bytes":
hist := samples[i].Value.Float64Histogram()
// Skip size class 0 in BySize, because it's always empty and not represented
// in the histogram.
for i, sc := range mstats.BySize[1:] {
if b, s := hist.Buckets[i+1], float64(sc.Size+1); b != s {
t.Errorf("bucket does not match size class: got %f, want %f", b, s)
// The rest of the checks aren't expected to work anyway.
continue
}
if c, f := hist.Counts[i], sc.Frees; c != f {
t.Errorf("histogram counts do not match BySize for class %d: got %d, want %d", i, c, f)
}
}
case "/gc/heap/frees:bytes":
checkUint64(t, name, samples[i].Value.Uint64(), mstats.TotalAlloc-mstats.HeapAlloc)
case "/gc/heap/tiny/allocs:objects":
// Currently, MemStats adds tiny alloc count to both Mallocs AND Frees.
// The reason for this is because MemStats couldn't be extended at the time
// but there was a desire to have Mallocs at least be a little more representative,
// while having Mallocs - Frees still represent a live object count.
// Unfortunately, MemStats doesn't actually export a large allocation count,
// so it's impossible to pull this number out directly.
//
// Check tiny allocation count outside of this loop, by using the allocs-by-size
// histogram in order to figure out how many large objects there are.
tinyAllocs = samples[i].Value.Uint64()
// Because the next two metrics tests are checking against Mallocs and Frees,
// we can't check them directly for the same reason: we need to account for tiny
// allocations included in Mallocs and Frees.
case "/gc/heap/allocs:objects":
mallocs = samples[i].Value.Uint64()
case "/gc/heap/frees:objects":
frees = samples[i].Value.Uint64()
case "/gc/heap/live:bytes":
// Check for "obviously wrong" values. We can't check a stronger invariant,
// such as live <= HeapAlloc, because live is not 100% accurate. It's computed
// under racy conditions, and some objects may be double-counted (this is
// intentional and necessary for GC performance).
//
// Instead, check against a much more reasonable upper-bound: the amount of
// mapped heap memory. We can't possibly overcount to the point of exceeding
// total mapped heap memory, except if there's an accounting bug.
if live := samples[i].Value.Uint64(); live > mstats.HeapSys {
t.Errorf("live bytes: %d > heap sys: %d", live, mstats.HeapSys)
} else if live == 0 {
// Might happen if we don't call runtime.GC() above.
t.Error("live bytes is 0")
}
case "/gc/gomemlimit:bytes":
checkUint64(t, name, samples[i].Value.Uint64(), uint64(limit))
case "/gc/heap/objects:objects":
checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapObjects)
case "/gc/heap/goal:bytes":
checkUint64(t, name, samples[i].Value.Uint64(), mstats.NextGC)
case "/gc/gogc:percent":
checkUint64(t, name, samples[i].Value.Uint64(), uint64(gcPercent))
case "/gc/cycles/automatic:gc-cycles":
checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumGC-mstats.NumForcedGC))
case "/gc/cycles/forced:gc-cycles":
checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumForcedGC))
case "/gc/cycles/total:gc-cycles":
checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumGC))
case "/gc/pauses:seconds":
gcPauses = samples[i].Value.Float64Histogram()
case "/sched/pauses/total/gc:seconds":
schedPausesTotalGC = samples[i].Value.Float64Histogram()
}
}
// Check tinyAllocs.
nonTinyAllocs := uint64(0)
for _, c := range allocsBySize.Counts {
nonTinyAllocs += c
}
checkUint64(t, "/gc/heap/tiny/allocs:objects", tinyAllocs, mstats.Mallocs-nonTinyAllocs)
// Check allocation and free counts.
checkUint64(t, "/gc/heap/allocs:objects", mallocs, mstats.Mallocs-tinyAllocs)
checkUint64(t, "/gc/heap/frees:objects", frees, mstats.Frees-tinyAllocs)
// Verify that /gc/pauses:seconds is a copy of /sched/pauses/total/gc:seconds
if !slices.Equal(gcPauses.Buckets, schedPausesTotalGC.Buckets) {
t.Errorf("/gc/pauses:seconds buckets %v do not match /sched/pauses/total/gc:seconds buckets %v", gcPauses.Buckets, schedPausesTotalGC.Counts)
}
if !slices.Equal(gcPauses.Counts, schedPausesTotalGC.Counts) {
t.Errorf("/gc/pauses:seconds counts %v do not match /sched/pauses/total/gc:seconds counts %v", gcPauses.Counts, schedPausesTotalGC.Counts)
}
}
func TestReadMetricsConsistency(t *testing.T) {
// Tests whether readMetrics produces consistent, sensible values.
// The values are read concurrently with the runtime doing other
// things (e.g. allocating) so what we read can't reasonably compared
// to other runtime values (e.g. MemStats).
// Run a few GC cycles to get some of the stats to be non-zero.
runtime.GC()
runtime.GC()
runtime.GC()
// Set GOMAXPROCS high then sleep briefly to ensure we generate
// some idle time.
oldmaxprocs := runtime.GOMAXPROCS(10)
time.Sleep(time.Millisecond)
runtime.GOMAXPROCS(oldmaxprocs)
// Read all the supported metrics through the metrics package.
descs, samples := prepareAllMetricsSamples()
metrics.Read(samples)
// Check to make sure the values we read make sense.
var totalVirtual struct {
got, want uint64
}
var objects struct {
alloc, free *metrics.Float64Histogram
allocs, frees uint64
allocdBytes, freedBytes uint64
total, totalBytes uint64
}
var gc struct {
numGC uint64
pauses uint64
}
var totalScan struct {
got, want uint64
}
var cpu struct {
gcAssist float64
gcDedicated float64
gcIdle float64
gcPause float64
gcTotal float64
idle float64
user float64
scavengeAssist float64
scavengeBg float64
scavengeTotal float64
total float64
}
for i := range samples {
kind := samples[i].Value.Kind()
if want := descs[samples[i].Name].Kind; kind != want {
t.Errorf("supported metric %q has unexpected kind: got %d, want %d", samples[i].Name, kind, want)
continue
}
if samples[i].Name != "/memory/classes/total:bytes" && strings.HasPrefix(samples[i].Name, "/memory/classes") {
v := samples[i].Value.Uint64()
totalVirtual.want += v
// None of these stats should ever get this big.
// If they do, there's probably overflow involved,
// usually due to bad accounting.
if int64(v) < 0 {
t.Errorf("%q has high/negative value: %d", samples[i].Name, v)
}
}
switch samples[i].Name {
case "/cpu/classes/gc/mark/assist:cpu-seconds":
cpu.gcAssist = samples[i].Value.Float64()
case "/cpu/classes/gc/mark/dedicated:cpu-seconds":
cpu.gcDedicated = samples[i].Value.Float64()
case "/cpu/classes/gc/mark/idle:cpu-seconds":
cpu.gcIdle = samples[i].Value.Float64()
case "/cpu/classes/gc/pause:cpu-seconds":
cpu.gcPause = samples[i].Value.Float64()
case "/cpu/classes/gc/total:cpu-seconds":
cpu.gcTotal = samples[i].Value.Float64()
case "/cpu/classes/idle:cpu-seconds":
cpu.idle = samples[i].Value.Float64()
case "/cpu/classes/scavenge/assist:cpu-seconds":
cpu.scavengeAssist = samples[i].Value.Float64()
case "/cpu/classes/scavenge/background:cpu-seconds":
cpu.scavengeBg = samples[i].Value.Float64()
case "/cpu/classes/scavenge/total:cpu-seconds":
cpu.scavengeTotal = samples[i].Value.Float64()
case "/cpu/classes/total:cpu-seconds":
cpu.total = samples[i].Value.Float64()
case "/cpu/classes/user:cpu-seconds":
cpu.user = samples[i].Value.Float64()
case "/memory/classes/total:bytes":
totalVirtual.got = samples[i].Value.Uint64()
case "/memory/classes/heap/objects:bytes":
objects.totalBytes = samples[i].Value.Uint64()
case "/gc/heap/objects:objects":
objects.total = samples[i].Value.Uint64()
case "/gc/heap/allocs:bytes":
objects.allocdBytes = samples[i].Value.Uint64()
case "/gc/heap/allocs:objects":
objects.allocs = samples[i].Value.Uint64()
case "/gc/heap/allocs-by-size:bytes":
objects.alloc = samples[i].Value.Float64Histogram()
case "/gc/heap/frees:bytes":
objects.freedBytes = samples[i].Value.Uint64()
case "/gc/heap/frees:objects":
objects.frees = samples[i].Value.Uint64()
case "/gc/heap/frees-by-size:bytes":
objects.free = samples[i].Value.Float64Histogram()
case "/gc/cycles:gc-cycles":
gc.numGC = samples[i].Value.Uint64()
case "/gc/pauses:seconds":
h := samples[i].Value.Float64Histogram()
gc.pauses = 0
for i := range h.Counts {
gc.pauses += h.Counts[i]
}
case "/gc/scan/heap:bytes":
totalScan.want += samples[i].Value.Uint64()
case "/gc/scan/globals:bytes":
totalScan.want += samples[i].Value.Uint64()
case "/gc/scan/stack:bytes":
totalScan.want += samples[i].Value.Uint64()
case "/gc/scan/total:bytes":
totalScan.got = samples[i].Value.Uint64()
case "/sched/gomaxprocs:threads":
if got, want := samples[i].Value.Uint64(), uint64(runtime.GOMAXPROCS(-1)); got != want {
t.Errorf("gomaxprocs doesn't match runtime.GOMAXPROCS: got %d, want %d", got, want)
}
case "/sched/goroutines:goroutines":
if samples[i].Value.Uint64() < 1 {
t.Error("number of goroutines is less than one")
}
}
}
// Only check this on Linux where we can be reasonably sure we have a high-resolution timer.
if runtime.GOOS == "linux" {
if cpu.gcDedicated <= 0 && cpu.gcAssist <= 0 && cpu.gcIdle <= 0 {
t.Errorf("found no time spent on GC work: %#v", cpu)
}
if cpu.gcPause <= 0 {
t.Errorf("found no GC pauses: %f", cpu.gcPause)
}
if cpu.idle <= 0 {
t.Errorf("found no idle time: %f", cpu.idle)
}
if total := cpu.gcDedicated + cpu.gcAssist + cpu.gcIdle + cpu.gcPause; !withinEpsilon(cpu.gcTotal, total, 0.001) {
t.Errorf("calculated total GC CPU time not within %%0.1 of total: %f vs. %f", total, cpu.gcTotal)
}
if total := cpu.scavengeAssist + cpu.scavengeBg; !withinEpsilon(cpu.scavengeTotal, total, 0.001) {
t.Errorf("calculated total scavenge CPU not within %%0.1 of total: %f vs. %f", total, cpu.scavengeTotal)
}
if cpu.total <= 0 {
t.Errorf("found no total CPU time passed")
}
if cpu.user <= 0 {
t.Errorf("found no user time passed")
}
if total := cpu.gcTotal + cpu.scavengeTotal + cpu.user + cpu.idle; !withinEpsilon(cpu.total, total, 0.001) {
t.Errorf("calculated total CPU not within %%0.1 of total: %f vs. %f", total, cpu.total)
}
}
if totalVirtual.got != totalVirtual.want {
t.Errorf(`"/memory/classes/total:bytes" does not match sum of /memory/classes/**: got %d, want %d`, totalVirtual.got, totalVirtual.want)
}
if got, want := objects.allocs-objects.frees, objects.total; got != want {
t.Errorf("mismatch between object alloc/free tallies and total: got %d, want %d", got, want)
}
if got, want := objects.allocdBytes-objects.freedBytes, objects.totalBytes; got != want {
t.Errorf("mismatch between object alloc/free tallies and total: got %d, want %d", got, want)
}
if b, c := len(objects.alloc.Buckets), len(objects.alloc.Counts); b != c+1 {
t.Errorf("allocs-by-size has wrong bucket or counts length: %d buckets, %d counts", b, c)
}
if b, c := len(objects.free.Buckets), len(objects.free.Counts); b != c+1 {
t.Errorf("frees-by-size has wrong bucket or counts length: %d buckets, %d counts", b, c)
}
if len(objects.alloc.Buckets) != len(objects.free.Buckets) {
t.Error("allocs-by-size and frees-by-size buckets don't match in length")
} else if len(objects.alloc.Counts) != len(objects.free.Counts) {
t.Error("allocs-by-size and frees-by-size counts don't match in length")
} else {
for i := range objects.alloc.Buckets {
ba := objects.alloc.Buckets[i]
bf := objects.free.Buckets[i]
if ba != bf {
t.Errorf("bucket %d is different for alloc and free hists: %f != %f", i, ba, bf)
}
}
if !t.Failed() {
var gotAlloc, gotFree uint64
want := objects.total
for i := range objects.alloc.Counts {
if objects.alloc.Counts[i] < objects.free.Counts[i] {
t.Errorf("found more allocs than frees in object dist bucket %d", i)
continue
}
gotAlloc += objects.alloc.Counts[i]
gotFree += objects.free.Counts[i]
}
if got := gotAlloc - gotFree; got != want {
t.Errorf("object distribution counts don't match count of live objects: got %d, want %d", got, want)
}
if gotAlloc != objects.allocs {
t.Errorf("object distribution counts don't match total allocs: got %d, want %d", gotAlloc, objects.allocs)
}
if gotFree != objects.frees {
t.Errorf("object distribution counts don't match total allocs: got %d, want %d", gotFree, objects.frees)
}
}
}
// The current GC has at least 2 pauses per GC.
// Check to see if that value makes sense.
if gc.pauses < gc.numGC*2 {
t.Errorf("fewer pauses than expected: got %d, want at least %d", gc.pauses, gc.numGC*2)
}
if totalScan.got <= 0 {
t.Errorf("scannable GC space is empty: %d", totalScan.got)
}
if totalScan.got != totalScan.want {
t.Errorf("/gc/scan/total:bytes doesn't line up with sum of /gc/scan*: total %d vs. sum %d", totalScan.got, totalScan.want)
}
}
func BenchmarkReadMetricsLatency(b *testing.B) {
stop := applyGCLoad(b)
// Spend this much time measuring latencies.
latencies := make([]time.Duration, 0, 1024)
_, samples := prepareAllMetricsSamples()
// Hit metrics.Read continuously and measure.
b.ResetTimer()
for i := 0; i < b.N; i++ {
start := time.Now()
metrics.Read(samples)
latencies = append(latencies, time.Since(start))
}
// Make sure to stop the timer before we wait! The load created above
// is very heavy-weight and not easy to stop, so we could end up
// confusing the benchmarking framework for small b.N.
b.StopTimer()
stop()
// Disable the default */op metrics.
// ns/op doesn't mean anything because it's an average, but we
// have a sleep in our b.N loop above which skews this significantly.
b.ReportMetric(0, "ns/op")
b.ReportMetric(0, "B/op")
b.ReportMetric(0, "allocs/op")
// Sort latencies then report percentiles.
sort.Slice(latencies, func(i, j int) bool {
return latencies[i] < latencies[j]
})
b.ReportMetric(float64(latencies[len(latencies)*50/100]), "p50-ns")
b.ReportMetric(float64(latencies[len(latencies)*90/100]), "p90-ns")
b.ReportMetric(float64(latencies[len(latencies)*99/100]), "p99-ns")
}
var readMetricsSink [1024]interface{}
func TestReadMetricsCumulative(t *testing.T) {
// Set up the set of metrics marked cumulative.
descs := metrics.All()
var samples [2][]metrics.Sample
samples[0] = make([]metrics.Sample, len(descs))
samples[1] = make([]metrics.Sample, len(descs))
total := 0
for i := range samples[0] {
if !descs[i].Cumulative {
continue
}
samples[0][total].Name = descs[i].Name
total++
}
samples[0] = samples[0][:total]
samples[1] = samples[1][:total]
copy(samples[1], samples[0])
// Start some noise in the background.
var wg sync.WaitGroup
wg.Add(1)
done := make(chan struct{})
go func() {
defer wg.Done()
for {
// Add more things here that could influence metrics.
for i := 0; i < 10; i++ {
runtime.AddCleanup(new(*int), func(_ struct{}) {}, struct{}{})
runtime.SetFinalizer(new(*int), func(_ **int) {})
}
for i := 0; i < len(readMetricsSink); i++ {
readMetricsSink[i] = make([]byte, 1024)
select {
case <-done:
return
default:
}
}
runtime.GC()
}
}()
sum := func(us []uint64) uint64 {
total := uint64(0)
for _, u := range us {
total += u
}
return total
}
// Populate the first generation.
metrics.Read(samples[0])
// Check to make sure that these metrics only grow monotonically.
for gen := 1; gen < 10; gen++ {
metrics.Read(samples[gen%2])
for i := range samples[gen%2] {
name := samples[gen%2][i].Name
vNew, vOld := samples[gen%2][i].Value, samples[1-(gen%2)][i].Value
switch vNew.Kind() {
case metrics.KindUint64:
new := vNew.Uint64()
old := vOld.Uint64()
if new < old {
t.Errorf("%s decreased: %d < %d", name, new, old)
}
case metrics.KindFloat64:
new := vNew.Float64()
old := vOld.Float64()
if new < old {
t.Errorf("%s decreased: %f < %f", name, new, old)
}
case metrics.KindFloat64Histogram:
new := sum(vNew.Float64Histogram().Counts)
old := sum(vOld.Float64Histogram().Counts)
if new < old {
t.Errorf("%s counts decreased: %d < %d", name, new, old)
}
}
}
}
close(done)
wg.Wait()
}
func withinEpsilon(v1, v2, e float64) bool {
return v2-v2*e <= v1 && v1 <= v2+v2*e
}
func TestMutexWaitTimeMetric(t *testing.T) {
var sample [1]metrics.Sample
sample[0].Name = "/sync/mutex/wait/total:seconds"
locks := []locker2{
new(mutex),
new(rwmutexWrite),
new(rwmutexReadWrite),
new(rwmutexWriteRead),
}
for _, lock := range locks {
t.Run(reflect.TypeOf(lock).Elem().Name(), func(t *testing.T) {
metrics.Read(sample[:])
before := time.Duration(sample[0].Value.Float64() * 1e9)
minMutexWaitTime := generateMutexWaitTime(lock)
metrics.Read(sample[:])
after := time.Duration(sample[0].Value.Float64() * 1e9)
if wt := after - before; wt < minMutexWaitTime {
t.Errorf("too little mutex wait time: got %s, want %s", wt, minMutexWaitTime)
}
})
}
}
// locker2 represents an API surface of two concurrent goroutines
// locking the same resource, but through different APIs. It's intended
// to abstract over the relationship of two Lock calls or an RLock
// and a Lock call.
type locker2 interface {
Lock1()
Unlock1()
Lock2()
Unlock2()
}
type mutex struct {
mu sync.Mutex
}
func (m *mutex) Lock1() { m.mu.Lock() }
func (m *mutex) Unlock1() { m.mu.Unlock() }
func (m *mutex) Lock2() { m.mu.Lock() }
func (m *mutex) Unlock2() { m.mu.Unlock() }
type rwmutexWrite struct {
mu sync.RWMutex
}
func (m *rwmutexWrite) Lock1() { m.mu.Lock() }
func (m *rwmutexWrite) Unlock1() { m.mu.Unlock() }
func (m *rwmutexWrite) Lock2() { m.mu.Lock() }
func (m *rwmutexWrite) Unlock2() { m.mu.Unlock() }
type rwmutexReadWrite struct {
mu sync.RWMutex
}
func (m *rwmutexReadWrite) Lock1() { m.mu.RLock() }
func (m *rwmutexReadWrite) Unlock1() { m.mu.RUnlock() }
func (m *rwmutexReadWrite) Lock2() { m.mu.Lock() }
func (m *rwmutexReadWrite) Unlock2() { m.mu.Unlock() }
type rwmutexWriteRead struct {
mu sync.RWMutex
}
func (m *rwmutexWriteRead) Lock1() { m.mu.Lock() }
func (m *rwmutexWriteRead) Unlock1() { m.mu.Unlock() }
func (m *rwmutexWriteRead) Lock2() { m.mu.RLock() }
func (m *rwmutexWriteRead) Unlock2() { m.mu.RUnlock() }
// generateMutexWaitTime causes a couple of goroutines
// to block a whole bunch of times on a sync.Mutex, returning
// the minimum amount of time that should be visible in the
// /sync/mutex-wait:seconds metric.
func generateMutexWaitTime(mu locker2) time.Duration {
// Set up the runtime to always track casgstatus transitions for metrics.
*runtime.CasGStatusAlwaysTrack = true
mu.Lock1()
// Start up a goroutine to wait on the lock.
gc := make(chan *runtime.G)
done := make(chan bool)
go func() {
gc <- runtime.Getg()
for {
mu.Lock2()
mu.Unlock2()
if <-done {
return
}
}
}()
gp := <-gc
// Set the block time high enough so that it will always show up, even
// on systems with coarse timer granularity.
const blockTime = 100 * time.Millisecond
// Make sure the goroutine spawned above actually blocks on the lock.
for {
if runtime.GIsWaitingOnMutex(gp) {
break
}
runtime.Gosched()
}
// Let some amount of time pass.
time.Sleep(blockTime)
// Let the other goroutine acquire the lock.
mu.Unlock1()
done <- true
// Reset flag.
*runtime.CasGStatusAlwaysTrack = false
return blockTime
}
// See issue #60276.
func TestCPUMetricsSleep(t *testing.T) {
if runtime.GOOS == "wasip1" {
// Since wasip1 busy-waits in the scheduler, there's no meaningful idle
// time. This is accurately reflected in the metrics, but it means this
// test is basically meaningless on this platform.
t.Skip("wasip1 currently busy-waits in idle time; test not applicable")
}
names := []string{
"/cpu/classes/idle:cpu-seconds",
"/cpu/classes/gc/mark/assist:cpu-seconds",
"/cpu/classes/gc/mark/dedicated:cpu-seconds",
"/cpu/classes/gc/mark/idle:cpu-seconds",
"/cpu/classes/gc/pause:cpu-seconds",
"/cpu/classes/gc/total:cpu-seconds",
"/cpu/classes/scavenge/assist:cpu-seconds",
"/cpu/classes/scavenge/background:cpu-seconds",
"/cpu/classes/scavenge/total:cpu-seconds",
"/cpu/classes/total:cpu-seconds",
"/cpu/classes/user:cpu-seconds",
}
prep := func() []metrics.Sample {
mm := make([]metrics.Sample, len(names))
for i := range names {
mm[i].Name = names[i]
}
return mm
}
m1, m2 := prep(), prep()
const (
// Expected time spent idle.
dur = 100 * time.Millisecond
// maxFailures is the number of consecutive failures requires to cause the test to fail.
maxFailures = 10
)
failureIdleTimes := make([]float64, 0, maxFailures)
// If the bug we expect is happening, then the Sleep CPU time will be accounted for
// as user time rather than idle time. In an ideal world we'd expect the whole application
// to go instantly idle the moment this goroutine goes to sleep, and stay asleep for that
// duration. However, the Go runtime can easily eat into idle time while this goroutine is
// blocked in a sleep. For example, slow platforms might spend more time expected in the
// scheduler. Another example is that a Go runtime background goroutine could run while
// everything else is idle. Lastly, if a running goroutine is descheduled by the OS, enough
// time may pass such that the goroutine is ready to wake, even though the runtime couldn't
// observe itself as idle with nanotime.
//
// To deal with all this, we give a half-proc's worth of leniency.
//
// We also retry multiple times to deal with the fact that the OS might deschedule us before
// we yield and go idle. That has a rare enough chance that retries should resolve it.
// If the issue we expect is happening, it should be persistent.
minIdleCPUSeconds := dur.Seconds() * (float64(runtime.GOMAXPROCS(-1)) - 0.5)
// Let's make sure there's no background scavenge work to do.
//
// The runtime.GC calls below ensure the background sweeper
// will not run during the idle period.
debug.FreeOSMemory()
for retries := 0; retries < maxFailures; retries++ {
// Read 1.
runtime.GC() // Update /cpu/classes metrics.
metrics.Read(m1)
// Sleep.
time.Sleep(dur)
// Read 2.
runtime.GC() // Update /cpu/classes metrics.
metrics.Read(m2)
dt := m2[0].Value.Float64() - m1[0].Value.Float64()
if dt >= minIdleCPUSeconds {
// All is well. Test passed.
return
}
failureIdleTimes = append(failureIdleTimes, dt)
// Try again.
}
// We couldn't observe the expected idle time even once.
for i, dt := range failureIdleTimes {
t.Logf("try %2d: idle time = %.5fs\n", i+1, dt)
}
t.Logf("try %d breakdown:\n", len(failureIdleTimes))
for i := range names {
if m1[i].Value.Kind() == metrics.KindBad {
continue
}
t.Logf("\t%s %0.3f\n", names[i], m2[i].Value.Float64()-m1[i].Value.Float64())
}
t.Errorf(`time.Sleep did not contribute enough to "idle" class: minimum idle time = %.5fs`, minIdleCPUSeconds)
}
// Call f() and verify that the correct STW metrics increment. If isGC is true,
// fn triggers a GC STW. Otherwise, fn triggers an other STW.
func testSchedPauseMetrics(t *testing.T, fn func(t *testing.T), isGC bool) {
m := []metrics.Sample{
{Name: "/sched/pauses/stopping/gc:seconds"},
{Name: "/sched/pauses/stopping/other:seconds"},
{Name: "/sched/pauses/total/gc:seconds"},
{Name: "/sched/pauses/total/other:seconds"},
}
stoppingGC := &m[0]
stoppingOther := &m[1]
totalGC := &m[2]
totalOther := &m[3]
sampleCount := func(s *metrics.Sample) uint64 {
h := s.Value.Float64Histogram()
var n uint64
for _, c := range h.Counts {
n += c
}
return n
}
// Read baseline.
metrics.Read(m)
baselineStartGC := sampleCount(stoppingGC)
baselineStartOther := sampleCount(stoppingOther)
baselineTotalGC := sampleCount(totalGC)
baselineTotalOther := sampleCount(totalOther)
fn(t)
metrics.Read(m)
if isGC {
if got := sampleCount(stoppingGC); got <= baselineStartGC {
t.Errorf("/sched/pauses/stopping/gc:seconds sample count %d did not increase from baseline of %d", got, baselineStartGC)
}
if got := sampleCount(totalGC); got <= baselineTotalGC {
t.Errorf("/sched/pauses/total/gc:seconds sample count %d did not increase from baseline of %d", got, baselineTotalGC)
}
if got := sampleCount(stoppingOther); got != baselineStartOther {
t.Errorf("/sched/pauses/stopping/other:seconds sample count %d changed from baseline of %d", got, baselineStartOther)
}
if got := sampleCount(totalOther); got != baselineTotalOther {
t.Errorf("/sched/pauses/stopping/other:seconds sample count %d changed from baseline of %d", got, baselineTotalOther)
}
} else {
if got := sampleCount(stoppingGC); got != baselineStartGC {
t.Errorf("/sched/pauses/stopping/gc:seconds sample count %d changed from baseline of %d", got, baselineStartGC)
}
if got := sampleCount(totalGC); got != baselineTotalGC {
t.Errorf("/sched/pauses/total/gc:seconds sample count %d changed from baseline of %d", got, baselineTotalGC)
}
if got := sampleCount(stoppingOther); got <= baselineStartOther {
t.Errorf("/sched/pauses/stopping/other:seconds sample count %d did not increase from baseline of %d", got, baselineStartOther)
}
if got := sampleCount(totalOther); got <= baselineTotalOther {
t.Errorf("/sched/pauses/stopping/other:seconds sample count %d did not increase from baseline of %d", got, baselineTotalOther)
}
}
}
func TestSchedPauseMetrics(t *testing.T) {
tests := []struct {
name string
isGC bool
fn func(t *testing.T)
}{
{
name: "runtime.GC",
isGC: true,
fn: func(t *testing.T) {
runtime.GC()
},
},
{
name: "runtime.GOMAXPROCS",
fn: func(t *testing.T) {
if runtime.GOARCH == "wasm" {
t.Skip("GOMAXPROCS >1 not supported on wasm")
}
n := runtime.GOMAXPROCS(0)
defer runtime.GOMAXPROCS(n)
runtime.GOMAXPROCS(n + 1)
},
},
{
name: "runtime.GoroutineProfile",
fn: func(t *testing.T) {
var s [1]runtime.StackRecord
runtime.GoroutineProfile(s[:])
},
},
{
name: "runtime.ReadMemStats",
fn: func(t *testing.T) {
var mstats runtime.MemStats
runtime.ReadMemStats(&mstats)
},
},
{
name: "runtime.Stack",
fn: func(t *testing.T) {
var b [64]byte
runtime.Stack(b[:], true)
},
},
{
name: "runtime/debug.WriteHeapDump",
fn: func(t *testing.T) {
if runtime.GOOS == "js" {
t.Skip("WriteHeapDump not supported on js")
}
f, err := os.CreateTemp(t.TempDir(), "heapdumptest")
if err != nil {
t.Fatalf("os.CreateTemp failed: %v", err)
}
defer os.Remove(f.Name())
defer f.Close()
debug.WriteHeapDump(f.Fd())
},
},
{
name: "runtime/trace.Start",
fn: func(t *testing.T) {
if trace.IsEnabled() {
t.Skip("tracing already enabled")
}
var buf bytes.Buffer
if err := trace.Start(&buf); err != nil {
t.Errorf("trace.Start err got %v want nil", err)
}
trace.Stop()
},
},
}
// These tests count STW pauses, classified based on whether they're related
// to the GC or not. Disable automatic GC cycles during the test so we don't
// have an incidental GC pause when we're trying to observe only
// non-GC-related pauses. This is especially important for the
// runtime/trace.Start test, since (as of this writing) that will block
// until any active GC mark phase completes.
defer debug.SetGCPercent(debug.SetGCPercent(-1))
runtime.GC()
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
testSchedPauseMetrics(t, tc.fn, tc.isGC)
})
}
}
func TestRuntimeLockMetricsAndProfile(t *testing.T) {
old := runtime.SetMutexProfileFraction(0) // enabled during sub-tests
defer runtime.SetMutexProfileFraction(old)
if old != 0 {
t.Fatalf("need MutexProfileRate 0, got %d", old)
}
t.Logf("NumCPU %d", runtime.NumCPU())
t.Logf("GOMAXPROCS %d", runtime.GOMAXPROCS(0))
if minCPU := 2; runtime.NumCPU() < minCPU {
t.Skipf("creating and observing contention on runtime-internal locks requires NumCPU >= %d", minCPU)
}
loadProfile := func(t *testing.T) *profile.Profile {
var w bytes.Buffer
pprof.Lookup("mutex").WriteTo(&w, 0)
p, err := profile.Parse(&w)
if err != nil {
t.Fatalf("failed to parse profile: %v", err)
}
if err := p.CheckValid(); err != nil {
t.Fatalf("invalid profile: %v", err)
}
return p
}
measureDelta := func(t *testing.T, fn func()) (metricGrowth, profileGrowth float64, p *profile.Profile) {
beforeProfile := loadProfile(t)
beforeMetrics := []metrics.Sample{{Name: "/sync/mutex/wait/total:seconds"}}
metrics.Read(beforeMetrics)
fn()
afterProfile := loadProfile(t)
afterMetrics := []metrics.Sample{{Name: "/sync/mutex/wait/total:seconds"}}
metrics.Read(afterMetrics)
sumSamples := func(p *profile.Profile, i int) int64 {
var sum int64
for _, s := range p.Sample {
sum += s.Value[i]
}
return sum
}
metricGrowth = afterMetrics[0].Value.Float64() - beforeMetrics[0].Value.Float64()
profileGrowth = float64(sumSamples(afterProfile, 1)-sumSamples(beforeProfile, 1)) * time.Nanosecond.Seconds()
// The internal/profile package does not support compaction; this delta
// profile will include separate positive and negative entries.
p = afterProfile.Copy()
if len(beforeProfile.Sample) > 0 {
err := p.Merge(beforeProfile, -1)
if err != nil {
t.Fatalf("Merge profiles: %v", err)
}
}
return metricGrowth, profileGrowth, p
}
testcase := func(strictTiming bool, acceptStacks [][]string, workers int, fn func() bool) func(t *testing.T) (metricGrowth float64, profileGrowth []int64, n, value int64, explain func()) {
return func(t *testing.T) (metricGrowth float64, profileGrowth []int64, n, value int64, explain func()) {
metricGrowth, totalProfileGrowth, p := measureDelta(t, func() {
var started, stopped sync.WaitGroup
started.Add(workers)
stopped.Add(workers)
for i := 0; i < workers; i++ {
w := &contentionWorker{
before: func() {
started.Done()
started.Wait()
},
after: func() {
stopped.Done()
},
fn: fn,
}
go w.run()
}
stopped.Wait()
})
if totalProfileGrowth == 0 {
t.Errorf("no increase in mutex profile")
}
if metricGrowth == 0 && strictTiming {
// If the critical section is very short, systems with low timer
// resolution may be unable to measure it via nanotime.
//
// This is sampled at 1 per gTrackingPeriod, but the explicit
// runtime.mutex tests create 200 contention events. Observing
// zero of those has a probability of (7/8)^200 = 2.5e-12 which
// is acceptably low (though the calculation has a tenuous
// dependency on cheaprandn being a good-enough source of
// entropy).
t.Errorf("no increase in /sync/mutex/wait/total:seconds metric")
}
// This comparison is possible because the time measurements in support of
// runtime/pprof and runtime/metrics for runtime-internal locks are so close
// together. It doesn't work as well for user-space contention, where the
// involved goroutines are not _Grunnable the whole time and so need to pass
// through the scheduler.
t.Logf("lock contention growth in runtime/pprof's view (%fs)", totalProfileGrowth)
t.Logf("lock contention growth in runtime/metrics' view (%fs)", metricGrowth)
acceptStacks = append([][]string(nil), acceptStacks...)
for i, stk := range acceptStacks {
if goexperiment.StaticLockRanking {
if !slices.ContainsFunc(stk, func(s string) bool {
return s == "runtime.systemstack" || s == "runtime.mcall" || s == "runtime.mstart"
}) {
// stk is a call stack that is still on the user stack when
// it calls runtime.unlock. Add the extra function that
// we'll see, when the static lock ranking implementation of
// runtime.unlockWithRank switches to the system stack.
stk = append([]string{"runtime.unlockWithRank"}, stk...)
}
}
acceptStacks[i] = stk
}
var stks [][]string
values := make([][2]int64, len(acceptStacks)+1)
for _, s := range p.Sample {
var have []string
for _, loc := range s.Location {
for _, line := range loc.Line {
have = append(have, line.Function.Name)
}
}
stks = append(stks, have)
found := false
for i, stk := range acceptStacks {
if slices.Equal(have, stk) {
values[i][0] += s.Value[0]
values[i][1] += s.Value[1]
found = true
break
}
}
if !found {
values[len(values)-1][0] += s.Value[0]
values[len(values)-1][1] += s.Value[1]
}
}
profileGrowth = make([]int64, len(acceptStacks)+1)
profileGrowth[len(profileGrowth)-1] = values[len(values)-1][1]
for i, stk := range acceptStacks {
n += values[i][0]
value += values[i][1]
profileGrowth[i] = values[i][1]
t.Logf("stack %v has samples totaling n=%d value=%d", stk, values[i][0], values[i][1])
}
if n == 0 && value == 0 {
t.Logf("profile:\n%s", p)
for _, have := range stks {
t.Logf("have stack %v", have)
}
for _, stk := range acceptStacks {
t.Errorf("want stack %v", stk)
}
}
return metricGrowth, profileGrowth, n, value, func() {
t.Logf("profile:\n%s", p)
}
}
}
name := t.Name()
t.Run("runtime.lock", func(t *testing.T) {
// The goroutine that acquires the lock will only proceed when it
// detects that its partner is contended for the lock. That will lead to
// live-lock if anything (such as a STW) prevents the partner goroutine
// from running. Allowing the contention workers to pause and restart
// (to allow a STW to proceed) makes it harder to confirm that we're
// counting the correct number of contention events, since some locks
// will end up contended twice. Instead, disable the GC.
defer debug.SetGCPercent(debug.SetGCPercent(-1))
mus := make([]runtime.Mutex, 200)
var needContention atomic.Int64
baseDelay := 100 * time.Microsecond // large relative to system noise, for comparison between clocks
fastDelayMicros := baseDelay.Microseconds()
slowDelayMicros := baseDelay.Microseconds() * 4
const (
fastRole = iota
slowRole
workerCount
)
if runtime.GOMAXPROCS(0) < workerCount {
t.Skipf("contention on runtime-internal locks requires GOMAXPROCS >= %d", workerCount)
}
minTicks := make([][]int64, workerCount) // lower bound, known-contended time, measured by cputicks
maxTicks := make([][]int64, workerCount) // upper bound, total lock() duration, measured by cputicks
for i := range minTicks {
minTicks[i] = make([]int64, len(mus))
maxTicks[i] = make([]int64, len(mus))
}
var id atomic.Int32
fn := func() bool {
n := int(needContention.Load())
if n < 0 {
return false
}
mu := &mus[n]
// Each worker has a role: to have a fast or slow critical section.
// Rotate the role assignments as we step through the mutex slice so
// we don't end up with one M always claiming the same kind of work.
id := int(id.Add(1))
role := (id + n) % workerCount
marker, delayMicros := fastMarkerFrame, fastDelayMicros
if role == slowRole {
marker, delayMicros = slowMarkerFrame, slowDelayMicros
}
// Each lock is used by two different critical sections, one fast
// and one slow, identified in profiles by their different "marker"
// functions. We expect the profile to blame each for the amount of
// delay it inflicts on other users of the lock. We run one worker
// of each kind, so any contention in one would be due to the other.
//
// We measure how long our runtime.lock call takes, which sets an
// upper bound on how much blame to expect for the other worker type
// in the profile. And if we acquire the lock first, we wait for the
// other worker to announce its contention. We measure the
// known-contended time, to use as a lower bound on how much blame
// we expect of ourselves in the profile. Then we stall for a little
// while (different amounts for "fast" versus "slow") before
// unlocking the mutex.
marker(func() {
t0 := runtime.Cputicks()
runtime.Lock(mu)
maxTicks[role][n] = runtime.Cputicks() - t0
minTicks[role][n] = 0
for int(needContention.Load()) == n {
if runtime.MutexContended(mu) {
t1 := runtime.Cputicks()
// make them wait a little while
for start := runtime.Nanotime(); (runtime.Nanotime()-start)/1000 < delayMicros; {
runtime.Usleep(uint32(1 + delayMicros/8))
}
minTicks[role][n] = runtime.Cputicks() - t1
break
}
}
runtime.Unlock(mu)
needContention.Store(int64(n - 1))
})
return true
}
stks := make([][]string, 2)
for i := range stks {
marker := "runtime_test.fastMarkerFrame"
if i == slowRole {
marker = "runtime_test.slowMarkerFrame"
}
stks[i] = []string{
"runtime.unlock",
"runtime_test." + name + ".func4.1.1",
marker,
"runtime_test." + name + ".func4.1",
"runtime_test.(*contentionWorker).run",
}
}
t.Run("sample-1", func(t *testing.T) {
old := runtime.SetMutexProfileFraction(1)
defer runtime.SetMutexProfileFraction(old)
needContention.Store(int64(len(mus) - 1))
metricGrowth, profileGrowth, n, _, explain := testcase(true, stks, workerCount, fn)(t)
defer func() {
if t.Failed() {
explain()
}
}()
t.Run("metric", func(t *testing.T) {
// The runtime/metrics view may be sampled at 1 per
// gTrackingPeriod, so we don't have a hard lower bound here.
testenv.SkipFlaky(t, 64253)
if have, want := metricGrowth, baseDelay.Seconds()*float64(len(mus)); have < want {
// The test imposes a delay with usleep, verified with calls to
// nanotime. Compare against the runtime/metrics package's view
// (based on nanotime) rather than runtime/pprof's view (based
// on cputicks).
t.Errorf("runtime/metrics reported less than the known minimum contention duration (%fs < %fs)", have, want)
}
})
if have, want := n, int64(len(mus)); have != want {
t.Errorf("mutex profile reported contention count different from the known true count (%d != %d)", have, want)
}
var slowMinTicks, fastMinTicks int64
for role, ticks := range minTicks {
for _, delta := range ticks {
if role == slowRole {
slowMinTicks += delta
} else {
fastMinTicks += delta
}
}
}
var slowMaxTicks, fastMaxTicks int64
for role, ticks := range maxTicks {
for _, delta := range ticks {
if role == slowRole {
slowMaxTicks += delta
} else {
fastMaxTicks += delta
}
}
}
cpuGHz := float64(runtime.CyclesPerSecond()) / 1e9
for _, set := range []struct {
name string
profTime int64
minTime int64
maxTime int64
}{
{
name: "slow",
profTime: profileGrowth[slowRole],
minTime: int64(float64(slowMinTicks) / cpuGHz),
maxTime: int64(float64(fastMaxTicks) / cpuGHz),
},
{
name: "fast",
profTime: profileGrowth[fastRole],
minTime: int64(float64(fastMinTicks) / cpuGHz),
maxTime: int64(float64(slowMaxTicks) / cpuGHz),
},
} {
t.Logf("profile's view of delays due to %q critical section: %dns", set.name, set.profTime)
t.Logf("test's view of known-contended time within %q critical section: %dns", set.name, set.minTime)
t.Logf("test's view of lock duration before critical sections other than %q: %dns", set.name, set.maxTime)
if set.profTime < set.minTime {
t.Errorf("profile undercounted %q critical section", set.name)
}
if set.profTime > set.maxTime {
t.Errorf("profile overcounted %q critical section", set.name)
}
}
var totalProfileGrowth float64
for _, growth := range profileGrowth {
totalProfileGrowth += float64(growth) * time.Nanosecond.Seconds()
}
const slop = 1.5 // account for nanotime vs cputicks
t.Run("compare timers", func(t *testing.T) {
testenv.SkipFlaky(t, 64253)
if totalProfileGrowth > slop*metricGrowth || metricGrowth > slop*totalProfileGrowth {
t.Errorf("views differ by more than %fx", slop)
}
})
})
t.Run("sample-2", func(t *testing.T) {
testenv.SkipFlaky(t, 64253)
old := runtime.SetMutexProfileFraction(2)
defer runtime.SetMutexProfileFraction(old)
needContention.Store(int64(len(mus) - 1))
metricGrowth, profileGrowth, n, _, explain := testcase(true, stks, workerCount, fn)(t)
defer func() {
if t.Failed() {
explain()
}
}()
// With 100 trials and profile fraction of 2, we expect to capture
// 50 samples. Allow the test to pass if we get at least 20 samples;
// the CDF of the binomial distribution says there's less than a
// 1e-9 chance of that, which is an acceptably low flakiness rate.
const samplingSlop = 2.5
if have, want := metricGrowth, baseDelay.Seconds()*float64(len(mus)); samplingSlop*have < want {
// The test imposes a delay with usleep, verified with calls to
// nanotime. Compare against the runtime/metrics package's view
// (based on nanotime) rather than runtime/pprof's view (based
// on cputicks).
t.Errorf("runtime/metrics reported less than the known minimum contention duration (%f * %fs < %fs)", samplingSlop, have, want)
}
if have, want := n, int64(len(mus)); float64(have) > float64(want)*samplingSlop || float64(want) > float64(have)*samplingSlop {
t.Errorf("mutex profile reported contention count too different from the expected count (%d far from %d)", have, want)
}
var totalProfileGrowth float64
for _, growth := range profileGrowth {
totalProfileGrowth += float64(growth) * time.Nanosecond.Seconds()
}
const timerSlop = 1.5 * samplingSlop // account for nanotime vs cputicks, plus the two views' independent sampling
if totalProfileGrowth > timerSlop*metricGrowth || metricGrowth > timerSlop*totalProfileGrowth {
t.Errorf("views differ by more than %fx", timerSlop)
}
})
})
t.Run("runtime.semrelease", func(t *testing.T) {
testenv.SkipFlaky(t, 64253)
old := runtime.SetMutexProfileFraction(1)
defer runtime.SetMutexProfileFraction(old)
const workers = 3
if runtime.GOMAXPROCS(0) < workers {
t.Skipf("creating and observing contention on runtime-internal semaphores requires GOMAXPROCS >= %d", workers)
}
var sem uint32 = 1
var tries atomic.Int32
tries.Store(10_000_000) // prefer controlled failure to timeout
var sawContention atomic.Int32
var need int32 = 1
fn := func() bool {
if sawContention.Load() >= need {
return false
}
if tries.Add(-1) < 0 {
return false
}
runtime.Semacquire(&sem)
runtime.Semrelease1(&sem, false, 0)
if runtime.MutexContended(runtime.SemRootLock(&sem)) {
sawContention.Add(1)
}
return true
}
stks := [][]string{
{
"runtime.unlock",
"runtime.semrelease1",
"runtime_test.TestRuntimeLockMetricsAndProfile.func5.1",
"runtime_test.(*contentionWorker).run",
},
{
"runtime.unlock",
"runtime.semacquire1",
"runtime.semacquire",
"runtime_test.TestRuntimeLockMetricsAndProfile.func5.1",
"runtime_test.(*contentionWorker).run",
},
}
// Verify that we get call stack we expect, with anything more than zero
// cycles / zero samples. The duration of each contention event is too
// small relative to the expected overhead for us to verify its value
// more directly. Leave that to the explicit lock/unlock test.
testcase(false, stks, workers, fn)(t)
if remaining := tries.Load(); remaining >= 0 {
t.Logf("finished test early (%d tries remaining)", remaining)
}
})
}
func slowMarkerFrame(fn func()) { fn() }
func fastMarkerFrame(fn func()) { fn() }
// contentionWorker provides cleaner call stacks for lock contention profile tests
type contentionWorker struct {
before func()
fn func() bool
after func()
}
func (w *contentionWorker) run() {
defer w.after()
w.before()
for w.fn() {
}
}
func TestCPUStats(t *testing.T) {
// Run a few GC cycles to get some of the stats to be non-zero.
runtime.GC()
runtime.GC()
runtime.GC()
// Set GOMAXPROCS high then sleep briefly to ensure we generate
// some idle time.
oldmaxprocs := runtime.GOMAXPROCS(10)
time.Sleep(time.Millisecond)
runtime.GOMAXPROCS(oldmaxprocs)
stats := runtime.ReadCPUStats()
gcTotal := stats.GCAssistTime + stats.GCDedicatedTime + stats.GCIdleTime + stats.GCPauseTime
if gcTotal != stats.GCTotalTime {
t.Errorf("manually computed total does not match GCTotalTime: %d cpu-ns vs. %d cpu-ns", gcTotal, stats.GCTotalTime)
}
scavTotal := stats.ScavengeAssistTime + stats.ScavengeBgTime
if scavTotal != stats.ScavengeTotalTime {
t.Errorf("manually computed total does not match ScavengeTotalTime: %d cpu-ns vs. %d cpu-ns", scavTotal, stats.ScavengeTotalTime)
}
total := gcTotal + scavTotal + stats.IdleTime + stats.UserTime
if total != stats.TotalTime {
t.Errorf("manually computed overall total does not match TotalTime: %d cpu-ns vs. %d cpu-ns", total, stats.TotalTime)
}
if total == 0 {
t.Error("total time is zero")
}
if gcTotal == 0 {
t.Error("GC total time is zero")
}
if stats.IdleTime == 0 {
t.Error("idle time is zero")
}
}
func TestMetricHeapUnusedLargeObjectOverflow(t *testing.T) {
// This test makes sure /memory/classes/heap/unused:bytes
// doesn't overflow when allocating and deallocating large
// objects. It is a regression test for #67019.
done := make(chan struct{})
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
for {
for range 10 {
abi.Escape(make([]byte, 1<<20))
}
runtime.GC()
select {
case <-done:
return
default:
}
}
}()
s := []metrics.Sample{
{Name: "/memory/classes/heap/unused:bytes"},
}
for range 1000 {
metrics.Read(s)
if s[0].Value.Uint64() > 1<<40 {
t.Errorf("overflow")
break
}
}
done <- struct{}{}
wg.Wait()
}
func TestReadMetricsCleanups(t *testing.T) {
runtime.GC() // End any in-progress GC.
runtime.BlockUntilEmptyCleanupQueue(int64(1 * time.Second)) // Flush any queued cleanups.
var before [2]metrics.Sample
before[0].Name = "/gc/cleanups/queued:cleanups"
before[1].Name = "/gc/cleanups/executed:cleanups"
after := before
metrics.Read(before[:])
const N = 10
for i := 0; i < N; i++ {
runtime.AddCleanup(new(*int), func(_ struct{}) {}, struct{}{})
}
runtime.GC()
runtime.BlockUntilEmptyCleanupQueue(int64(1 * time.Second))
metrics.Read(after[:])
if v0, v1 := before[0].Value.Uint64(), after[0].Value.Uint64(); v0+N != v1 {
t.Errorf("expected %s difference to be exactly %d, got %d -> %d", before[0].Name, N, v0, v1)
}
if v0, v1 := before[1].Value.Uint64(), after[1].Value.Uint64(); v0+N != v1 {
t.Errorf("expected %s difference to be exactly %d, got %d -> %d", before[1].Name, N, v0, v1)
}
}
func TestReadMetricsFinalizers(t *testing.T) {
runtime.GC() // End any in-progress GC.
runtime.BlockUntilEmptyFinalizerQueue(int64(1 * time.Second)) // Flush any queued finalizers.
var before [2]metrics.Sample
before[0].Name = "/gc/finalizers/queued:finalizers"
before[1].Name = "/gc/finalizers/executed:finalizers"
after := before
metrics.Read(before[:])
const N = 10
for i := 0; i < N; i++ {
runtime.SetFinalizer(new(*int), func(_ **int) {})
}
runtime.GC()
runtime.GC()
runtime.BlockUntilEmptyFinalizerQueue(int64(1 * time.Second))
metrics.Read(after[:])
if v0, v1 := before[0].Value.Uint64(), after[0].Value.Uint64(); v0+N != v1 {
t.Errorf("expected %s difference to be exactly %d, got %d -> %d", before[0].Name, N, v0, v1)
}
if v0, v1 := before[1].Value.Uint64(), after[1].Value.Uint64(); v0+N != v1 {
t.Errorf("expected %s difference to be exactly %d, got %d -> %d", before[1].Name, N, v0, v1)
}
}
func TestReadMetricsSched(t *testing.T) {
const (
notInGo = iota
runnable
running
waiting
created
threads
numSamples
)
var s [numSamples]metrics.Sample
s[notInGo].Name = "/sched/goroutines/not-in-go:goroutines"
s[runnable].Name = "/sched/goroutines/runnable:goroutines"
s[running].Name = "/sched/goroutines/running:goroutines"
s[waiting].Name = "/sched/goroutines/waiting:goroutines"
s[created].Name = "/sched/goroutines-created:goroutines"
s[threads].Name = "/sched/threads/total:threads"
logMetrics := func(t *testing.T, s []metrics.Sample) {
for i := range s {
t.Logf("%s: %d", s[i].Name, s[i].Value.Uint64())
}
}
// generalSlack is the amount of goroutines we allow ourselves to be
// off by in any given category, either due to background system
// goroutines or testing package goroutines.
const generalSlack = 4
// waitingSlack is the max number of blocked goroutines left
// from other tests, the testing package, or system
// goroutines.
const waitingSlack = 100
// threadsSlack is the maximum number of threads left over
// from other tests and the runtime (sysmon, the template thread, etc.)
const threadsSlack = 20
// Make sure GC isn't running, since GC workers interfere with
// expected counts.
defer debug.SetGCPercent(debug.SetGCPercent(-1))
runtime.GC()
check := func(t *testing.T, s *metrics.Sample, min, max uint64) {
val := s.Value.Uint64()
if val < min {
t.Errorf("%s too low; %d < %d", s.Name, val, min)
}
if val > max {
t.Errorf("%s too high; %d > %d", s.Name, val, max)
}
}
checkEq := func(t *testing.T, s *metrics.Sample, value uint64) {
check(t, s, value, value)
}
spinUntil := func(f func() bool) bool {
for {
if f() {
return true
}
time.Sleep(50 * time.Millisecond)
}
}
// Check base values.
t.Run("base", func(t *testing.T) {
defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
metrics.Read(s[:])
logMetrics(t, s[:])
check(t, &s[notInGo], 0, generalSlack)
check(t, &s[runnable], 0, generalSlack)
checkEq(t, &s[running], 1)
check(t, &s[waiting], 0, waitingSlack)
})
metrics.Read(s[:])
createdAfterBase := s[created].Value.Uint64()
// Force Running count to be high. We'll use these goroutines
// for Runnable, too.
const count = 10
var ready, exit atomic.Uint32
for i := 0; i < count-1; i++ {
go func() {
ready.Add(1)
for exit.Load() == 0 {
// Spin to get us and keep us running, but check
// the exit condition so we exit out early if we're
// done.
start := time.Now()
for time.Since(start) < 10*time.Millisecond && exit.Load() == 0 {
}
runtime.Gosched()
}
}()
}
for ready.Load() < count-1 {
runtime.Gosched()
}
// Be careful. We've entered a dangerous state for platforms
// that do not return back to the underlying system unless all
// goroutines are blocked, like js/wasm, since we have a bunch
// of runnable goroutines all spinning. We cannot write anything
// out.
if testenv.HasParallelism() {
t.Run("created", func(t *testing.T) {
metrics.Read(s[:])
logMetrics(t, s[:])
checkEq(t, &s[created], createdAfterBase+count)
})
t.Run("running", func(t *testing.T) {
defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(count + 4))
// It can take a little bit for the scheduler to
// distribute the goroutines to Ps, so retry until
// we see the count we expect or the test times out.
spinUntil(func() bool {
metrics.Read(s[:])
return s[running].Value.Uint64() >= count
})
logMetrics(t, s[:])
check(t, &s[running], count, count+4)
check(t, &s[threads], count, count+4+threadsSlack)
})
// Force runnable count to be high.
t.Run("runnable", func(t *testing.T) {
defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
metrics.Read(s[:])
logMetrics(t, s[:])
checkEq(t, &s[running], 1)
check(t, &s[runnable], count-1, count+generalSlack)
})
// Done with the running/runnable goroutines.
exit.Store(1)
} else {
// Read metrics and then exit all the other goroutines,
// so that system calls may proceed.
metrics.Read(s[:])
// Done with the running/runnable goroutines.
exit.Store(1)
// Now we can check our invariants.
t.Run("created", func(t *testing.T) {
// Look for count-1 goroutines because we read metrics
// *before* t.Run goroutine was created for this sub-test.
checkEq(t, &s[created], createdAfterBase+count-1)
})
t.Run("running", func(t *testing.T) {
logMetrics(t, s[:])
checkEq(t, &s[running], 1)
checkEq(t, &s[threads], 1)
})
t.Run("runnable", func(t *testing.T) {
logMetrics(t, s[:])
check(t, &s[runnable], count-1, count+generalSlack)
})
}
// Force not-in-go count to be high. This is a little tricky since
// we try really hard not to let things block in system calls.
// We have to drop to the syscall package to do this reliably.
t.Run("not-in-go", func(t *testing.T) {
// Block a bunch of goroutines on an OS pipe.
pr, pw, err := pipe()
if err != nil {
switch runtime.GOOS {
case "js", "wasip1":
t.Skip("creating pipe:", err)
}
t.Fatal("creating pipe:", err)
}
for i := 0; i < count; i++ {
go syscall.Read(pr, make([]byte, 1))
}
// Let the goroutines block.
spinUntil(func() bool {
metrics.Read(s[:])
return s[notInGo].Value.Uint64() >= count
})
logMetrics(t, s[:])
check(t, &s[notInGo], count, count+generalSlack)
syscall.Close(pw)
syscall.Close(pr)
})
t.Run("waiting", func(t *testing.T) {
// Force waiting count to be high.
const waitingCount = 1000
stop := make(chan bool)
for i := 0; i < waitingCount; i++ {
go func() { <-stop }()
}
// Let the goroutines block.
spinUntil(func() bool {
metrics.Read(s[:])
return s[waiting].Value.Uint64() >= waitingCount
})
logMetrics(t, s[:])
check(t, &s[waiting], waitingCount, waitingCount+waitingSlack)
close(stop)
})
}