go/src/runtime/metrics_test.go

// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package runtime_test

import (
	"bytes"
	"internal/abi"
	"internal/goexperiment"
	"internal/profile"
	"internal/testenv"
	"os"
	"reflect"
	"runtime"
	"runtime/debug"
	"runtime/metrics"
	"runtime/pprof"
	"runtime/trace"
	"slices"
	"sort"
	"strings"
	"sync"
	"sync/atomic"
	"syscall"
	"testing"
	"time"
	"unsafe"
)

func prepareAllMetricsSamples() (map[string]metrics.Description, []metrics.Sample) {
	all := metrics.All()
	samples := make([]metrics.Sample, len(all))
	descs := make(map[string]metrics.Description)
	for i := range all {
		samples[i].Name = all[i].Name
		descs[all[i].Name] = all[i]
	}
	return descs, samples
}

func TestReadMetrics(t *testing.T) {
	// Run a GC cycle to get some of the stats to be non-zero.
	runtime.GC()

	// Set an arbitrary memory limit to check the metric for it
	limit := int64(512 * 1024 * 1024)
	oldLimit := debug.SetMemoryLimit(limit)
	defer debug.SetMemoryLimit(oldLimit)

	// Set a GC percent to check the metric for it
	gcPercent := 99
	oldGCPercent := debug.SetGCPercent(gcPercent)
	defer debug.SetGCPercent(oldGCPercent)

	// Tests whether readMetrics produces values aligning
	// with ReadMemStats while the world is stopped.
	var mstats runtime.MemStats
	_, samples := prepareAllMetricsSamples()
	runtime.ReadMetricsSlow(&mstats, unsafe.Pointer(&samples[0]), len(samples), cap(samples))

	checkUint64 := func(t *testing.T, m string, got, want uint64) {
		t.Helper()
		if got != want {
			t.Errorf("metric %q: got %d, want %d", m, got, want)
		}
	}

	// Check to make sure the values we read line up with other values we read.
	var allocsBySize, gcPauses, schedPausesTotalGC *metrics.Float64Histogram
	var tinyAllocs uint64
	var mallocs, frees uint64
	for i := range samples {
		switch name := samples[i].Name; name {
		case "/cgo/go-to-c-calls:calls":
			checkUint64(t, name, samples[i].Value.Uint64(), uint64(runtime.NumCgoCall()))
		case "/memory/classes/heap/free:bytes":
			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapIdle-mstats.HeapReleased)
		case "/memory/classes/heap/released:bytes":
			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapReleased)
		case "/memory/classes/heap/objects:bytes":
			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapAlloc)
		case "/memory/classes/heap/unused:bytes":
			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapInuse-mstats.HeapAlloc)
		case "/memory/classes/heap/stacks:bytes":
			checkUint64(t, name, samples[i].Value.Uint64(), mstats.StackInuse)
		case "/memory/classes/metadata/mcache/free:bytes":
			checkUint64(t, name, samples[i].Value.Uint64(), mstats.MCacheSys-mstats.MCacheInuse)
		case "/memory/classes/metadata/mcache/inuse:bytes":
			checkUint64(t, name, samples[i].Value.Uint64(), mstats.MCacheInuse)
		case "/memory/classes/metadata/mspan/free:bytes":
			checkUint64(t, name, samples[i].Value.Uint64(), mstats.MSpanSys-mstats.MSpanInuse)
		case "/memory/classes/metadata/mspan/inuse:bytes":
			checkUint64(t, name, samples[i].Value.Uint64(), mstats.MSpanInuse)
		case "/memory/classes/metadata/other:bytes":
			checkUint64(t, name, samples[i].Value.Uint64(), mstats.GCSys)
		case "/memory/classes/os-stacks:bytes":
			checkUint64(t, name, samples[i].Value.Uint64(), mstats.StackSys-mstats.StackInuse)
		case "/memory/classes/other:bytes":
			checkUint64(t, name, samples[i].Value.Uint64(), mstats.OtherSys)
		case "/memory/classes/profiling/buckets:bytes":
			checkUint64(t, name, samples[i].Value.Uint64(), mstats.BuckHashSys)
		case "/memory/classes/total:bytes":
			checkUint64(t, name, samples[i].Value.Uint64(), mstats.Sys)
		case "/gc/heap/allocs-by-size:bytes":
			hist := samples[i].Value.Float64Histogram()
			// Skip size class 0 in BySize, because it's always empty and not represented
			// in the histogram.
			for i, sc := range mstats.BySize[1:] {
				if b, s := hist.Buckets[i+1], float64(sc.Size+1); b != s {
					t.Errorf("bucket does not match size class: got %f, want %f", b, s)
					// The rest of the checks aren't expected to work anyway.
					continue
				}
				if c, m := hist.Counts[i], sc.Mallocs; c != m {
					t.Errorf("histogram counts do not much BySize for class %d: got %d, want %d", i, c, m)
				}
			}
			allocsBySize = hist
		case "/gc/heap/allocs:bytes":
			checkUint64(t, name, samples[i].Value.Uint64(), mstats.TotalAlloc)
		case "/gc/heap/frees-by-size:bytes":
			hist := samples[i].Value.Float64Histogram()
			// Skip size class 0 in BySize, because it's always empty and not represented
			// in the histogram.
			for i, sc := range mstats.BySize[1:] {
				if b, s := hist.Buckets[i+1], float64(sc.Size+1); b != s {
					t.Errorf("bucket does not match size class: got %f, want %f", b, s)
					// The rest of the checks aren't expected to work anyway.
					continue
				}
				if c, f := hist.Counts[i], sc.Frees; c != f {
					t.Errorf("histogram counts do not match BySize for class %d: got %d, want %d", i, c, f)
				}
			}
		case "/gc/heap/frees:bytes":
			checkUint64(t, name, samples[i].Value.Uint64(), mstats.TotalAlloc-mstats.HeapAlloc)
		case "/gc/heap/tiny/allocs:objects":
			// Currently, MemStats adds tiny alloc count to both Mallocs AND Frees.
			// The reason for this is because MemStats couldn't be extended at the time
			// but there was a desire to have Mallocs at least be a little more representative,
			// while having Mallocs - Frees still represent a live object count.
			// Unfortunately, MemStats doesn't actually export a large allocation count,
			// so it's impossible to pull this number out directly.
			//
			// Check tiny allocation count outside of this loop, by using the allocs-by-size
			// histogram in order to figure out how many large objects there are.
			tinyAllocs = samples[i].Value.Uint64()
			// Because the next two metrics tests are checking against Mallocs and Frees,
			// we can't check them directly for the same reason: we need to account for tiny
			// allocations included in Mallocs and Frees.
		case "/gc/heap/allocs:objects":
			mallocs = samples[i].Value.Uint64()
		case "/gc/heap/frees:objects":
			frees = samples[i].Value.Uint64()
		case "/gc/heap/live:bytes":
			// Check for "obviously wrong" values. We can't check a stronger invariant,
			// such as live <= HeapAlloc, because live is not 100% accurate. It's computed
			// under racy conditions, and some objects may be double-counted (this is
			// intentional and necessary for GC performance).
			//
			// Instead, check against a much more reasonable upper-bound: the amount of
			// mapped heap memory. We can't possibly overcount to the point of exceeding
			// total mapped heap memory, except if there's an accounting bug.
			if live := samples[i].Value.Uint64(); live > mstats.HeapSys {
				t.Errorf("live bytes: %d > heap sys: %d", live, mstats.HeapSys)
			} else if live == 0 {
				// Might happen if we don't call runtime.GC() above.
				t.Error("live bytes is 0")
			}
		case "/gc/gomemlimit:bytes":
			checkUint64(t, name, samples[i].Value.Uint64(), uint64(limit))
		case "/gc/heap/objects:objects":
			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapObjects)
		case "/gc/heap/goal:bytes":
			checkUint64(t, name, samples[i].Value.Uint64(), mstats.NextGC)
		case "/gc/gogc:percent":
			checkUint64(t, name, samples[i].Value.Uint64(), uint64(gcPercent))
		case "/gc/cycles/automatic:gc-cycles":
			checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumGC-mstats.NumForcedGC))
		case "/gc/cycles/forced:gc-cycles":
			checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumForcedGC))
		case "/gc/cycles/total:gc-cycles":
			checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumGC))
		case "/gc/pauses:seconds":
			gcPauses = samples[i].Value.Float64Histogram()
		case "/sched/pauses/total/gc:seconds":
			schedPausesTotalGC = samples[i].Value.Float64Histogram()
		}
	}

	// Check tinyAllocs.
	nonTinyAllocs := uint64(0)
	for _, c := range allocsBySize.Counts {
		nonTinyAllocs += c
	}
	checkUint64(t, "/gc/heap/tiny/allocs:objects", tinyAllocs, mstats.Mallocs-nonTinyAllocs)

	// Check allocation and free counts.
	checkUint64(t, "/gc/heap/allocs:objects", mallocs, mstats.Mallocs-tinyAllocs)
	checkUint64(t, "/gc/heap/frees:objects", frees, mstats.Frees-tinyAllocs)

	// Verify that /gc/pauses:seconds is a copy of /sched/pauses/total/gc:seconds
	if !slices.Equal(gcPauses.Buckets, schedPausesTotalGC.Buckets) {
		t.Errorf("/gc/pauses:seconds buckets %v do not match /sched/pauses/total/gc:seconds buckets %v", gcPauses.Buckets, schedPausesTotalGC.Counts)
	}
	if !slices.Equal(gcPauses.Counts, schedPausesTotalGC.Counts) {
		t.Errorf("/gc/pauses:seconds counts %v do not match /sched/pauses/total/gc:seconds counts %v", gcPauses.Counts, schedPausesTotalGC.Counts)
	}
}

func TestReadMetricsConsistency(t *testing.T) {
	// Tests whether readMetrics produces consistent, sensible values.
	// The values are read concurrently with the runtime doing other
	// things (e.g. allocating) so what we read can't reasonably compared
	// to other runtime values (e.g. MemStats).

	// Run a few GC cycles to get some of the stats to be non-zero.
	runtime.GC()
	runtime.GC()
	runtime.GC()

	// Set GOMAXPROCS high then sleep briefly to ensure we generate
	// some idle time.
	oldmaxprocs := runtime.GOMAXPROCS(10)
	time.Sleep(time.Millisecond)
	runtime.GOMAXPROCS(oldmaxprocs)

	// Read all the supported metrics through the metrics package.
	descs, samples := prepareAllMetricsSamples()
	metrics.Read(samples)

	// Check to make sure the values we read make sense.
	var totalVirtual struct {
		got, want uint64
	}
	var objects struct {
		alloc, free             *metrics.Float64Histogram
		allocs, frees           uint64
		allocdBytes, freedBytes uint64
		total, totalBytes       uint64
	}
	var gc struct {
		numGC  uint64
		pauses uint64
	}
	var totalScan struct {
		got, want uint64
	}
	var cpu struct {
		gcAssist    float64
		gcDedicated float64
		gcIdle      float64
		gcPause     float64
		gcTotal     float64

		idle float64
		user float64

		scavengeAssist float64
		scavengeBg     float64
		scavengeTotal  float64

		total float64
	}
	for i := range samples {
		kind := samples[i].Value.Kind()
		if want := descs[samples[i].Name].Kind; kind != want {
			t.Errorf("supported metric %q has unexpected kind: got %d, want %d", samples[i].Name, kind, want)
			continue
		}
		if samples[i].Name != "/memory/classes/total:bytes" && strings.HasPrefix(samples[i].Name, "/memory/classes") {
			v := samples[i].Value.Uint64()
			totalVirtual.want += v

			// None of these stats should ever get this big.
			// If they do, there's probably overflow involved,
			// usually due to bad accounting.
			if int64(v) < 0 {
				t.Errorf("%q has high/negative value: %d", samples[i].Name, v)
			}
		}
		switch samples[i].Name {
		case "/cpu/classes/gc/mark/assist:cpu-seconds":
			cpu.gcAssist = samples[i].Value.Float64()
		case "/cpu/classes/gc/mark/dedicated:cpu-seconds":
			cpu.gcDedicated = samples[i].Value.Float64()
		case "/cpu/classes/gc/mark/idle:cpu-seconds":
			cpu.gcIdle = samples[i].Value.Float64()
		case "/cpu/classes/gc/pause:cpu-seconds":
			cpu.gcPause = samples[i].Value.Float64()
		case "/cpu/classes/gc/total:cpu-seconds":
			cpu.gcTotal = samples[i].Value.Float64()
		case "/cpu/classes/idle:cpu-seconds":
			cpu.idle = samples[i].Value.Float64()
		case "/cpu/classes/scavenge/assist:cpu-seconds":
			cpu.scavengeAssist = samples[i].Value.Float64()
		case "/cpu/classes/scavenge/background:cpu-seconds":
			cpu.scavengeBg = samples[i].Value.Float64()
		case "/cpu/classes/scavenge/total:cpu-seconds":
			cpu.scavengeTotal = samples[i].Value.Float64()
		case "/cpu/classes/total:cpu-seconds":
			cpu.total = samples[i].Value.Float64()
		case "/cpu/classes/user:cpu-seconds":
			cpu.user = samples[i].Value.Float64()
		case "/memory/classes/total:bytes":
			totalVirtual.got = samples[i].Value.Uint64()
		case "/memory/classes/heap/objects:bytes":
			objects.totalBytes = samples[i].Value.Uint64()
		case "/gc/heap/objects:objects":
			objects.total = samples[i].Value.Uint64()
		case "/gc/heap/allocs:bytes":
			objects.allocdBytes = samples[i].Value.Uint64()
		case "/gc/heap/allocs:objects":
			objects.allocs = samples[i].Value.Uint64()
		case "/gc/heap/allocs-by-size:bytes":
			objects.alloc = samples[i].Value.Float64Histogram()
		case "/gc/heap/frees:bytes":
			objects.freedBytes = samples[i].Value.Uint64()
		case "/gc/heap/frees:objects":
			objects.frees = samples[i].Value.Uint64()
		case "/gc/heap/frees-by-size:bytes":
			objects.free = samples[i].Value.Float64Histogram()
		case "/gc/cycles:gc-cycles":
			gc.numGC = samples[i].Value.Uint64()
		case "/gc/pauses:seconds":
			h := samples[i].Value.Float64Histogram()
			gc.pauses = 0
			for i := range h.Counts {
				gc.pauses += h.Counts[i]
			}
		case "/gc/scan/heap:bytes":
			totalScan.want += samples[i].Value.Uint64()
		case "/gc/scan/globals:bytes":
			totalScan.want += samples[i].Value.Uint64()
		case "/gc/scan/stack:bytes":
			totalScan.want += samples[i].Value.Uint64()
		case "/gc/scan/total:bytes":
			totalScan.got = samples[i].Value.Uint64()
		case "/sched/gomaxprocs:threads":
			if got, want := samples[i].Value.Uint64(), uint64(runtime.GOMAXPROCS(-1)); got != want {
				t.Errorf("gomaxprocs doesn't match runtime.GOMAXPROCS: got %d, want %d", got, want)
			}
		case "/sched/goroutines:goroutines":
			if samples[i].Value.Uint64() < 1 {
				t.Error("number of goroutines is less than one")
			}
		}
	}
	// Only check this on Linux where we can be reasonably sure we have a high-resolution timer.
	if runtime.GOOS == "linux" {
		if cpu.gcDedicated <= 0 && cpu.gcAssist <= 0 && cpu.gcIdle <= 0 {
			t.Errorf("found no time spent on GC work: %#v", cpu)
		}
		if cpu.gcPause <= 0 {
			t.Errorf("found no GC pauses: %f", cpu.gcPause)
		}
		if cpu.idle <= 0 {
			t.Errorf("found no idle time: %f", cpu.idle)
		}
		if total := cpu.gcDedicated + cpu.gcAssist + cpu.gcIdle + cpu.gcPause; !withinEpsilon(cpu.gcTotal, total, 0.001) {
			t.Errorf("calculated total GC CPU time not within %%0.1 of total: %f vs. %f", total, cpu.gcTotal)
		}
		if total := cpu.scavengeAssist + cpu.scavengeBg; !withinEpsilon(cpu.scavengeTotal, total, 0.001) {
			t.Errorf("calculated total scavenge CPU not within %%0.1 of total: %f vs. %f", total, cpu.scavengeTotal)
		}
		if cpu.total <= 0 {
			t.Errorf("found no total CPU time passed")
		}
		if cpu.user <= 0 {
			t.Errorf("found no user time passed")
		}
		if total := cpu.gcTotal + cpu.scavengeTotal + cpu.user + cpu.idle; !withinEpsilon(cpu.total, total, 0.001) {
			t.Errorf("calculated total CPU not within %%0.1 of total: %f vs. %f", total, cpu.total)
		}
	}
	if totalVirtual.got != totalVirtual.want {
		t.Errorf(`"/memory/classes/total:bytes" does not match sum of /memory/classes/**: got %d, want %d`, totalVirtual.got, totalVirtual.want)
	}
	if got, want := objects.allocs-objects.frees, objects.total; got != want {
		t.Errorf("mismatch between object alloc/free tallies and total: got %d, want %d", got, want)
	}
	if got, want := objects.allocdBytes-objects.freedBytes, objects.totalBytes; got != want {
		t.Errorf("mismatch between object alloc/free tallies and total: got %d, want %d", got, want)
	}
	if b, c := len(objects.alloc.Buckets), len(objects.alloc.Counts); b != c+1 {
		t.Errorf("allocs-by-size has wrong bucket or counts length: %d buckets, %d counts", b, c)
	}
	if b, c := len(objects.free.Buckets), len(objects.free.Counts); b != c+1 {
		t.Errorf("frees-by-size has wrong bucket or counts length: %d buckets, %d counts", b, c)
	}
	if len(objects.alloc.Buckets) != len(objects.free.Buckets) {
		t.Error("allocs-by-size and frees-by-size buckets don't match in length")
	} else if len(objects.alloc.Counts) != len(objects.free.Counts) {
		t.Error("allocs-by-size and frees-by-size counts don't match in length")
	} else {
		for i := range objects.alloc.Buckets {
			ba := objects.alloc.Buckets[i]
			bf := objects.free.Buckets[i]
			if ba != bf {
				t.Errorf("bucket %d is different for alloc and free hists: %f != %f", i, ba, bf)
			}
		}
		if !t.Failed() {
			var gotAlloc, gotFree uint64
			want := objects.total
			for i := range objects.alloc.Counts {
				if objects.alloc.Counts[i] < objects.free.Counts[i] {
					t.Errorf("found more allocs than frees in object dist bucket %d", i)
					continue
				}
				gotAlloc += objects.alloc.Counts[i]
				gotFree += objects.free.Counts[i]
			}
			if got := gotAlloc - gotFree; got != want {
				t.Errorf("object distribution counts don't match count of live objects: got %d, want %d", got, want)
			}
			if gotAlloc != objects.allocs {
				t.Errorf("object distribution counts don't match total allocs: got %d, want %d", gotAlloc, objects.allocs)
			}
			if gotFree != objects.frees {
				t.Errorf("object distribution counts don't match total allocs: got %d, want %d", gotFree, objects.frees)
			}
		}
	}
	// The current GC has at least 2 pauses per GC.
	// Check to see if that value makes sense.
	if gc.pauses < gc.numGC*2 {
		t.Errorf("fewer pauses than expected: got %d, want at least %d", gc.pauses, gc.numGC*2)
	}
	if totalScan.got <= 0 {
		t.Errorf("scannable GC space is empty: %d", totalScan.got)
	}
	if totalScan.got != totalScan.want {
		t.Errorf("/gc/scan/total:bytes doesn't line up with sum of /gc/scan*: total %d vs. sum %d", totalScan.got, totalScan.want)
	}
}

func BenchmarkReadMetricsLatency(b *testing.B) {
	stop := applyGCLoad(b)

	// Spend this much time measuring latencies.
	latencies := make([]time.Duration, 0, 1024)
	_, samples := prepareAllMetricsSamples()

	// Hit metrics.Read continuously and measure.
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		start := time.Now()
		metrics.Read(samples)
		latencies = append(latencies, time.Since(start))
	}
	// Make sure to stop the timer before we wait! The load created above
	// is very heavy-weight and not easy to stop, so we could end up
	// confusing the benchmarking framework for small b.N.
	b.StopTimer()
	stop()

	// Disable the default */op metrics.
	// ns/op doesn't mean anything because it's an average, but we
	// have a sleep in our b.N loop above which skews this significantly.
	b.ReportMetric(0, "ns/op")
	b.ReportMetric(0, "B/op")
	b.ReportMetric(0, "allocs/op")

	// Sort latencies then report percentiles.
	sort.Slice(latencies, func(i, j int) bool {
		return latencies[i] < latencies[j]
	})
	b.ReportMetric(float64(latencies[len(latencies)*50/100]), "p50-ns")
	b.ReportMetric(float64(latencies[len(latencies)*90/100]), "p90-ns")
	b.ReportMetric(float64(latencies[len(latencies)*99/100]), "p99-ns")
}

var readMetricsSink [1024]interface{}

func TestReadMetricsCumulative(t *testing.T) {
	// Set up the set of metrics marked cumulative.
	descs := metrics.All()
	var samples [2][]metrics.Sample
	samples[0] = make([]metrics.Sample, len(descs))
	samples[1] = make([]metrics.Sample, len(descs))
	total := 0
	for i := range samples[0] {
		if !descs[i].Cumulative {
			continue
		}
		samples[0][total].Name = descs[i].Name
		total++
	}
	samples[0] = samples[0][:total]
	samples[1] = samples[1][:total]
	copy(samples[1], samples[0])

	// Start some noise in the background.
	var wg sync.WaitGroup
	wg.Add(1)
	done := make(chan struct{})
	go func() {
		defer wg.Done()
		for {
			// Add more things here that could influence metrics.
			for i := 0; i < 10; i++ {
				runtime.AddCleanup(new(*int), func(_ struct{}) {}, struct{}{})
				runtime.SetFinalizer(new(*int), func(_ **int) {})
			}
			for i := 0; i < len(readMetricsSink); i++ {
				readMetricsSink[i] = make([]byte, 1024)
				select {
				case <-done:
					return
				default:
				}
			}
			runtime.GC()
		}
	}()

	sum := func(us []uint64) uint64 {
		total := uint64(0)
		for _, u := range us {
			total += u
		}
		return total
	}

	// Populate the first generation.
	metrics.Read(samples[0])

	// Check to make sure that these metrics only grow monotonically.
	for gen := 1; gen < 10; gen++ {
		metrics.Read(samples[gen%2])
		for i := range samples[gen%2] {
			name := samples[gen%2][i].Name
			vNew, vOld := samples[gen%2][i].Value, samples[1-(gen%2)][i].Value

			switch vNew.Kind() {
			case metrics.KindUint64:
				new := vNew.Uint64()
				old := vOld.Uint64()
				if new < old {
					t.Errorf("%s decreased: %d < %d", name, new, old)
				}
			case metrics.KindFloat64:
				new := vNew.Float64()
				old := vOld.Float64()
				if new < old {
					t.Errorf("%s decreased: %f < %f", name, new, old)
				}
			case metrics.KindFloat64Histogram:
				new := sum(vNew.Float64Histogram().Counts)
				old := sum(vOld.Float64Histogram().Counts)
				if new < old {
					t.Errorf("%s counts decreased: %d < %d", name, new, old)
				}
			}
		}
	}
	close(done)

	wg.Wait()
}

func withinEpsilon(v1, v2, e float64) bool {
	return v2-v2*e <= v1 && v1 <= v2+v2*e
}

func TestMutexWaitTimeMetric(t *testing.T) {
	var sample [1]metrics.Sample
	sample[0].Name = "/sync/mutex/wait/total:seconds"

	locks := []locker2{
		new(mutex),
		new(rwmutexWrite),
		new(rwmutexReadWrite),
		new(rwmutexWriteRead),
	}
	for _, lock := range locks {
		t.Run(reflect.TypeOf(lock).Elem().Name(), func(t *testing.T) {
			metrics.Read(sample[:])
			before := time.Duration(sample[0].Value.Float64() * 1e9)

			minMutexWaitTime := generateMutexWaitTime(lock)

			metrics.Read(sample[:])
			after := time.Duration(sample[0].Value.Float64() * 1e9)

			if wt := after - before; wt < minMutexWaitTime {
				t.Errorf("too little mutex wait time: got %s, want %s", wt, minMutexWaitTime)
			}
		})
	}
}

// locker2 represents an API surface of two concurrent goroutines
// locking the same resource, but through different APIs. It's intended
// to abstract over the relationship of two Lock calls or an RLock
// and a Lock call.
type locker2 interface {
	Lock1()
	Unlock1()
	Lock2()
	Unlock2()
}

type mutex struct {
	mu sync.Mutex
}

func (m *mutex) Lock1()   { m.mu.Lock() }
func (m *mutex) Unlock1() { m.mu.Unlock() }
func (m *mutex) Lock2()   { m.mu.Lock() }
func (m *mutex) Unlock2() { m.mu.Unlock() }

type rwmutexWrite struct {
	mu sync.RWMutex
}

func (m *rwmutexWrite) Lock1()   { m.mu.Lock() }
func (m *rwmutexWrite) Unlock1() { m.mu.Unlock() }
func (m *rwmutexWrite) Lock2()   { m.mu.Lock() }
func (m *rwmutexWrite) Unlock2() { m.mu.Unlock() }

type rwmutexReadWrite struct {
	mu sync.RWMutex
}

func (m *rwmutexReadWrite) Lock1()   { m.mu.RLock() }
func (m *rwmutexReadWrite) Unlock1() { m.mu.RUnlock() }
func (m *rwmutexReadWrite) Lock2()   { m.mu.Lock() }
func (m *rwmutexReadWrite) Unlock2() { m.mu.Unlock() }

type rwmutexWriteRead struct {
	mu sync.RWMutex
}

func (m *rwmutexWriteRead) Lock1()   { m.mu.Lock() }
func (m *rwmutexWriteRead) Unlock1() { m.mu.Unlock() }
func (m *rwmutexWriteRead) Lock2()   { m.mu.RLock() }
func (m *rwmutexWriteRead) Unlock2() { m.mu.RUnlock() }

// generateMutexWaitTime causes a couple of goroutines
// to block a whole bunch of times on a sync.Mutex, returning
// the minimum amount of time that should be visible in the
// /sync/mutex-wait:seconds metric.
func generateMutexWaitTime(mu locker2) time.Duration {
	// Set up the runtime to always track casgstatus transitions for metrics.
	*runtime.CasGStatusAlwaysTrack = true

	mu.Lock1()

	// Start up a goroutine to wait on the lock.
	gc := make(chan *runtime.G)
	done := make(chan bool)
	go func() {
		gc <- runtime.Getg()

		for {
			mu.Lock2()
			mu.Unlock2()
			if <-done {
				return
			}
		}
	}()
	gp := <-gc

	// Set the block time high enough so that it will always show up, even
	// on systems with coarse timer granularity.
	const blockTime = 100 * time.Millisecond

	// Make sure the goroutine spawned above actually blocks on the lock.
	for {
		if runtime.GIsWaitingOnMutex(gp) {
			break
		}
		runtime.Gosched()
	}

	// Let some amount of time pass.
	time.Sleep(blockTime)

	// Let the other goroutine acquire the lock.
	mu.Unlock1()
	done <- true

	// Reset flag.
	*runtime.CasGStatusAlwaysTrack = false
	return blockTime
}

// See issue #60276.
func TestCPUMetricsSleep(t *testing.T) {
	if runtime.GOOS == "wasip1" {
		// Since wasip1 busy-waits in the scheduler, there's no meaningful idle
		// time. This is accurately reflected in the metrics, but it means this
		// test is basically meaningless on this platform.
		t.Skip("wasip1 currently busy-waits in idle time; test not applicable")
	}

	names := []string{
		"/cpu/classes/idle:cpu-seconds",

		"/cpu/classes/gc/mark/assist:cpu-seconds",
		"/cpu/classes/gc/mark/dedicated:cpu-seconds",
		"/cpu/classes/gc/mark/idle:cpu-seconds",
		"/cpu/classes/gc/pause:cpu-seconds",
		"/cpu/classes/gc/total:cpu-seconds",
		"/cpu/classes/scavenge/assist:cpu-seconds",
		"/cpu/classes/scavenge/background:cpu-seconds",
		"/cpu/classes/scavenge/total:cpu-seconds",
		"/cpu/classes/total:cpu-seconds",
		"/cpu/classes/user:cpu-seconds",
	}
	prep := func() []metrics.Sample {
		mm := make([]metrics.Sample, len(names))
		for i := range names {
			mm[i].Name = names[i]
		}
		return mm
	}
	m1, m2 := prep(), prep()

	const (
		// Expected time spent idle.
		dur = 100 * time.Millisecond

		// maxFailures is the number of consecutive failures requires to cause the test to fail.
		maxFailures = 10
	)

	failureIdleTimes := make([]float64, 0, maxFailures)

	// If the bug we expect is happening, then the Sleep CPU time will be accounted for
	// as user time rather than idle time. In an ideal world we'd expect the whole application
	// to go instantly idle the moment this goroutine goes to sleep, and stay asleep for that
	// duration. However, the Go runtime can easily eat into idle time while this goroutine is
	// blocked in a sleep. For example, slow platforms might spend more time expected in the
	// scheduler. Another example is that a Go runtime background goroutine could run while
	// everything else is idle. Lastly, if a running goroutine is descheduled by the OS, enough
	// time may pass such that the goroutine is ready to wake, even though the runtime couldn't
	// observe itself as idle with nanotime.
	//
	// To deal with all this, we give a half-proc's worth of leniency.
	//
	// We also retry multiple times to deal with the fact that the OS might deschedule us before
	// we yield and go idle. That has a rare enough chance that retries should resolve it.
	// If the issue we expect is happening, it should be persistent.
	minIdleCPUSeconds := dur.Seconds() * (float64(runtime.GOMAXPROCS(-1)) - 0.5)

	// Let's make sure there's no background scavenge work to do.
	//
	// The runtime.GC calls below ensure the background sweeper
	// will not run during the idle period.
	debug.FreeOSMemory()

	for retries := 0; retries < maxFailures; retries++ {
		// Read 1.
		runtime.GC() // Update /cpu/classes metrics.
		metrics.Read(m1)

		// Sleep.
		time.Sleep(dur)

		// Read 2.
		runtime.GC() // Update /cpu/classes metrics.
		metrics.Read(m2)

		dt := m2[0].Value.Float64() - m1[0].Value.Float64()
		if dt >= minIdleCPUSeconds {
			// All is well. Test passed.
			return
		}
		failureIdleTimes = append(failureIdleTimes, dt)
		// Try again.
	}

	// We couldn't observe the expected idle time even once.
	for i, dt := range failureIdleTimes {
		t.Logf("try %2d: idle time = %.5fs\n", i+1, dt)
	}
	t.Logf("try %d breakdown:\n", len(failureIdleTimes))
	for i := range names {
		if m1[i].Value.Kind() == metrics.KindBad {
			continue
		}
		t.Logf("\t%s %0.3f\n", names[i], m2[i].Value.Float64()-m1[i].Value.Float64())
	}
	t.Errorf(`time.Sleep did not contribute enough to "idle" class: minimum idle time = %.5fs`, minIdleCPUSeconds)
}

// Call f() and verify that the correct STW metrics increment. If isGC is true,
// fn triggers a GC STW. Otherwise, fn triggers an other STW.
func testSchedPauseMetrics(t *testing.T, fn func(t *testing.T), isGC bool) {
	m := []metrics.Sample{
		{Name: "/sched/pauses/stopping/gc:seconds"},
		{Name: "/sched/pauses/stopping/other:seconds"},
		{Name: "/sched/pauses/total/gc:seconds"},
		{Name: "/sched/pauses/total/other:seconds"},
	}

	stoppingGC := &m[0]
	stoppingOther := &m[1]
	totalGC := &m[2]
	totalOther := &m[3]

	sampleCount := func(s *metrics.Sample) uint64 {
		h := s.Value.Float64Histogram()

		var n uint64
		for _, c := range h.Counts {
			n += c
		}
		return n
	}

	// Read baseline.
	metrics.Read(m)

	baselineStartGC := sampleCount(stoppingGC)
	baselineStartOther := sampleCount(stoppingOther)
	baselineTotalGC := sampleCount(totalGC)
	baselineTotalOther := sampleCount(totalOther)

	fn(t)

	metrics.Read(m)

	if isGC {
		if got := sampleCount(stoppingGC); got <= baselineStartGC {
			t.Errorf("/sched/pauses/stopping/gc:seconds sample count %d did not increase from baseline of %d", got, baselineStartGC)
		}
		if got := sampleCount(totalGC); got <= baselineTotalGC {
			t.Errorf("/sched/pauses/total/gc:seconds sample count %d did not increase from baseline of %d", got, baselineTotalGC)
		}

		if got := sampleCount(stoppingOther); got != baselineStartOther {
			t.Errorf("/sched/pauses/stopping/other:seconds sample count %d changed from baseline of %d", got, baselineStartOther)
		}
		if got := sampleCount(totalOther); got != baselineTotalOther {
			t.Errorf("/sched/pauses/stopping/other:seconds sample count %d changed from baseline of %d", got, baselineTotalOther)
		}
	} else {
		if got := sampleCount(stoppingGC); got != baselineStartGC {
			t.Errorf("/sched/pauses/stopping/gc:seconds sample count %d changed from baseline of %d", got, baselineStartGC)
		}
		if got := sampleCount(totalGC); got != baselineTotalGC {
			t.Errorf("/sched/pauses/total/gc:seconds sample count %d changed from baseline of %d", got, baselineTotalGC)
		}

		if got := sampleCount(stoppingOther); got <= baselineStartOther {
			t.Errorf("/sched/pauses/stopping/other:seconds sample count %d did not increase from baseline of %d", got, baselineStartOther)
		}
		if got := sampleCount(totalOther); got <= baselineTotalOther {
			t.Errorf("/sched/pauses/stopping/other:seconds sample count %d did not increase from baseline of %d", got, baselineTotalOther)
		}
	}
}

func TestSchedPauseMetrics(t *testing.T) {
	tests := []struct {
		name string
		isGC bool
		fn   func(t *testing.T)
	}{
		{
			name: "runtime.GC",
			isGC: true,
			fn: func(t *testing.T) {
				runtime.GC()
			},
		},
		{
			name: "runtime.GOMAXPROCS",
			fn: func(t *testing.T) {
				if runtime.GOARCH == "wasm" {
					t.Skip("GOMAXPROCS >1 not supported on wasm")
				}

				n := runtime.GOMAXPROCS(0)
				defer runtime.GOMAXPROCS(n)

				runtime.GOMAXPROCS(n + 1)
			},
		},
		{
			name: "runtime.GoroutineProfile",
			fn: func(t *testing.T) {
				var s [1]runtime.StackRecord
				runtime.GoroutineProfile(s[:])
			},
		},
		{
			name: "runtime.ReadMemStats",
			fn: func(t *testing.T) {
				var mstats runtime.MemStats
				runtime.ReadMemStats(&mstats)
			},
		},
		{
			name: "runtime.Stack",
			fn: func(t *testing.T) {
				var b [64]byte
				runtime.Stack(b[:], true)
			},
		},
		{
			name: "runtime/debug.WriteHeapDump",
			fn: func(t *testing.T) {
				if runtime.GOOS == "js" {
					t.Skip("WriteHeapDump not supported on js")
				}

				f, err := os.CreateTemp(t.TempDir(), "heapdumptest")
				if err != nil {
					t.Fatalf("os.CreateTemp failed: %v", err)
				}
				defer os.Remove(f.Name())
				defer f.Close()
				debug.WriteHeapDump(f.Fd())
			},
		},
		{
			name: "runtime/trace.Start",
			fn: func(t *testing.T) {
				if trace.IsEnabled() {
					t.Skip("tracing already enabled")
				}

				var buf bytes.Buffer
				if err := trace.Start(&buf); err != nil {
					t.Errorf("trace.Start err got %v want nil", err)
				}
				trace.Stop()
			},
		},
	}

	// These tests count STW pauses, classified based on whether they're related
	// to the GC or not. Disable automatic GC cycles during the test so we don't
	// have an incidental GC pause when we're trying to observe only
	// non-GC-related pauses. This is especially important for the
	// runtime/trace.Start test, since (as of this writing) that will block
	// until any active GC mark phase completes.
	defer debug.SetGCPercent(debug.SetGCPercent(-1))
	runtime.GC()

	for _, tc := range tests {
		t.Run(tc.name, func(t *testing.T) {
			testSchedPauseMetrics(t, tc.fn, tc.isGC)
		})
	}
}

func TestRuntimeLockMetricsAndProfile(t *testing.T) {
	old := runtime.SetMutexProfileFraction(0) // enabled during sub-tests
	defer runtime.SetMutexProfileFraction(old)
	if old != 0 {
		t.Fatalf("need MutexProfileRate 0, got %d", old)
	}

	t.Logf("NumCPU %d", runtime.NumCPU())
	t.Logf("GOMAXPROCS %d", runtime.GOMAXPROCS(0))
	if minCPU := 2; runtime.NumCPU() < minCPU {
		t.Skipf("creating and observing contention on runtime-internal locks requires NumCPU >= %d", minCPU)
	}

	loadProfile := func(t *testing.T) *profile.Profile {
		var w bytes.Buffer
		pprof.Lookup("mutex").WriteTo(&w, 0)
		p, err := profile.Parse(&w)
		if err != nil {
			t.Fatalf("failed to parse profile: %v", err)
		}
		if err := p.CheckValid(); err != nil {
			t.Fatalf("invalid profile: %v", err)
		}
		return p
	}

	measureDelta := func(t *testing.T, fn func()) (metricGrowth, profileGrowth float64, p *profile.Profile) {
		beforeProfile := loadProfile(t)
		beforeMetrics := []metrics.Sample{{Name: "/sync/mutex/wait/total:seconds"}}
		metrics.Read(beforeMetrics)

		fn()

		afterProfile := loadProfile(t)
		afterMetrics := []metrics.Sample{{Name: "/sync/mutex/wait/total:seconds"}}
		metrics.Read(afterMetrics)

		sumSamples := func(p *profile.Profile, i int) int64 {
			var sum int64
			for _, s := range p.Sample {
				sum += s.Value[i]
			}
			return sum
		}

		metricGrowth = afterMetrics[0].Value.Float64() - beforeMetrics[0].Value.Float64()
		profileGrowth = float64(sumSamples(afterProfile, 1)-sumSamples(beforeProfile, 1)) * time.Nanosecond.Seconds()

		// The internal/profile package does not support compaction; this delta
		// profile will include separate positive and negative entries.
		p = afterProfile.Copy()
		if len(beforeProfile.Sample) > 0 {
			err := p.Merge(beforeProfile, -1)
			if err != nil {
				t.Fatalf("Merge profiles: %v", err)
			}
		}

		return metricGrowth, profileGrowth, p
	}

	testcase := func(strictTiming bool, acceptStacks [][]string, workers int, fn func() bool) func(t *testing.T) (metricGrowth float64, profileGrowth []int64, n, value int64, explain func()) {
		return func(t *testing.T) (metricGrowth float64, profileGrowth []int64, n, value int64, explain func()) {
			metricGrowth, totalProfileGrowth, p := measureDelta(t, func() {
				var started, stopped sync.WaitGroup
				started.Add(workers)
				stopped.Add(workers)
				for i := 0; i < workers; i++ {
					w := &contentionWorker{
						before: func() {
							started.Done()
							started.Wait()
						},
						after: func() {
							stopped.Done()
						},
						fn: fn,
					}
					go w.run()
				}
				stopped.Wait()
			})

			if totalProfileGrowth == 0 {
				t.Errorf("no increase in mutex profile")
			}
			if metricGrowth == 0 && strictTiming {
				// If the critical section is very short, systems with low timer
				// resolution may be unable to measure it via nanotime.
				//
				// This is sampled at 1 per gTrackingPeriod, but the explicit
				// runtime.mutex tests create 200 contention events. Observing
				// zero of those has a probability of (7/8)^200 = 2.5e-12 which
				// is acceptably low (though the calculation has a tenuous
				// dependency on cheaprandn being a good-enough source of
				// entropy).
				t.Errorf("no increase in /sync/mutex/wait/total:seconds metric")
			}
			// This comparison is possible because the time measurements in support of
			// runtime/pprof and runtime/metrics for runtime-internal locks are so close
			// together. It doesn't work as well for user-space contention, where the
			// involved goroutines are not _Grunnable the whole time and so need to pass
			// through the scheduler.
			t.Logf("lock contention growth in runtime/pprof's view  (%fs)", totalProfileGrowth)
			t.Logf("lock contention growth in runtime/metrics' view (%fs)", metricGrowth)

			acceptStacks = append([][]string(nil), acceptStacks...)
			for i, stk := range acceptStacks {
				if goexperiment.StaticLockRanking {
					if !slices.ContainsFunc(stk, func(s string) bool {
						return s == "runtime.systemstack" || s == "runtime.mcall" || s == "runtime.mstart"
					}) {
						// stk is a call stack that is still on the user stack when
						// it calls runtime.unlock. Add the extra function that
						// we'll see, when the static lock ranking implementation of
						// runtime.unlockWithRank switches to the system stack.
						stk = append([]string{"runtime.unlockWithRank"}, stk...)
					}
				}
				acceptStacks[i] = stk
			}

			var stks [][]string
			values := make([][2]int64, len(acceptStacks)+1)
			for _, s := range p.Sample {
				var have []string
				for _, loc := range s.Location {
					for _, line := range loc.Line {
						have = append(have, line.Function.Name)
					}
				}
				stks = append(stks, have)
				found := false
				for i, stk := range acceptStacks {
					if slices.Equal(have, stk) {
						values[i][0] += s.Value[0]
						values[i][1] += s.Value[1]
						found = true
						break
					}
				}
				if !found {
					values[len(values)-1][0] += s.Value[0]
					values[len(values)-1][1] += s.Value[1]
				}
			}
			profileGrowth = make([]int64, len(acceptStacks)+1)
			profileGrowth[len(profileGrowth)-1] = values[len(values)-1][1]
			for i, stk := range acceptStacks {
				n += values[i][0]
				value += values[i][1]
				profileGrowth[i] = values[i][1]
				t.Logf("stack %v has samples totaling n=%d value=%d", stk, values[i][0], values[i][1])
			}
			if n == 0 && value == 0 {
				t.Logf("profile:\n%s", p)
				for _, have := range stks {
					t.Logf("have stack %v", have)
				}
				for _, stk := range acceptStacks {
					t.Errorf("want stack %v", stk)
				}
			}

			return metricGrowth, profileGrowth, n, value, func() {
				t.Logf("profile:\n%s", p)
			}
		}
	}

	name := t.Name()

	t.Run("runtime.lock", func(t *testing.T) {
		// The goroutine that acquires the lock will only proceed when it
		// detects that its partner is contended for the lock. That will lead to
		// live-lock if anything (such as a STW) prevents the partner goroutine
		// from running. Allowing the contention workers to pause and restart
		// (to allow a STW to proceed) makes it harder to confirm that we're
		// counting the correct number of contention events, since some locks
		// will end up contended twice. Instead, disable the GC.
		defer debug.SetGCPercent(debug.SetGCPercent(-1))

		mus := make([]runtime.Mutex, 200)
		var needContention atomic.Int64

		baseDelay := 100 * time.Microsecond // large relative to system noise, for comparison between clocks
		fastDelayMicros := baseDelay.Microseconds()
		slowDelayMicros := baseDelay.Microseconds() * 4

		const (
			fastRole = iota
			slowRole
			workerCount
		)
		if runtime.GOMAXPROCS(0) < workerCount {
			t.Skipf("contention on runtime-internal locks requires GOMAXPROCS >= %d", workerCount)
		}

		minTicks := make([][]int64, workerCount) // lower bound, known-contended time, measured by cputicks
		maxTicks := make([][]int64, workerCount) // upper bound, total lock() duration, measured by cputicks
		for i := range minTicks {
			minTicks[i] = make([]int64, len(mus))
			maxTicks[i] = make([]int64, len(mus))
		}
		var id atomic.Int32
		fn := func() bool {
			n := int(needContention.Load())
			if n < 0 {
				return false
			}
			mu := &mus[n]

			// Each worker has a role: to have a fast or slow critical section.
			// Rotate the role assignments as we step through the mutex slice so
			// we don't end up with one M always claiming the same kind of work.
			id := int(id.Add(1))
			role := (id + n) % workerCount

			marker, delayMicros := fastMarkerFrame, fastDelayMicros
			if role == slowRole {
				marker, delayMicros = slowMarkerFrame, slowDelayMicros
			}

			// Each lock is used by two different critical sections, one fast
			// and one slow, identified in profiles by their different "marker"
			// functions. We expect the profile to blame each for the amount of
			// delay it inflicts on other users of the lock. We run one worker
			// of each kind, so any contention in one would be due to the other.
			//
			// We measure how long our runtime.lock call takes, which sets an
			// upper bound on how much blame to expect for the other worker type
			// in the profile. And if we acquire the lock first, we wait for the
			// other worker to announce its contention. We measure the
			// known-contended time, to use as a lower bound on how much blame
			// we expect of ourselves in the profile. Then we stall for a little
			// while (different amounts for "fast" versus "slow") before
			// unlocking the mutex.

			marker(func() {
				t0 := runtime.Cputicks()
				runtime.Lock(mu)
				maxTicks[role][n] = runtime.Cputicks() - t0
				minTicks[role][n] = 0
				for int(needContention.Load()) == n {
					if runtime.MutexContended(mu) {
						t1 := runtime.Cputicks()
						// make them wait a little while
						for start := runtime.Nanotime(); (runtime.Nanotime()-start)/1000 < delayMicros; {
							runtime.Usleep(uint32(1 + delayMicros/8))
						}
						minTicks[role][n] = runtime.Cputicks() - t1
						break
					}
				}
				runtime.Unlock(mu)
				needContention.Store(int64(n - 1))
			})

			return true
		}

		stks := make([][]string, 2)
		for i := range stks {
			marker := "runtime_test.fastMarkerFrame"
			if i == slowRole {
				marker = "runtime_test.slowMarkerFrame"
			}

			stks[i] = []string{
				"runtime.unlock",
				"runtime_test." + name + ".func4.1.1",
				marker,
				"runtime_test." + name + ".func4.1",
				"runtime_test.(*contentionWorker).run",
			}
		}

		t.Run("sample-1", func(t *testing.T) {
			old := runtime.SetMutexProfileFraction(1)
			defer runtime.SetMutexProfileFraction(old)

			needContention.Store(int64(len(mus) - 1))
			metricGrowth, profileGrowth, n, _, explain := testcase(true, stks, workerCount, fn)(t)
			defer func() {
				if t.Failed() {
					explain()
				}
			}()

			t.Run("metric", func(t *testing.T) {
				// The runtime/metrics view may be sampled at 1 per
				// gTrackingPeriod, so we don't have a hard lower bound here.
				testenv.SkipFlaky(t, 64253)

				if have, want := metricGrowth, baseDelay.Seconds()*float64(len(mus)); have < want {
					// The test imposes a delay with usleep, verified with calls to
					// nanotime. Compare against the runtime/metrics package's view
					// (based on nanotime) rather than runtime/pprof's view (based
					// on cputicks).
					t.Errorf("runtime/metrics reported less than the known minimum contention duration (%fs < %fs)", have, want)
				}
			})
			if have, want := n, int64(len(mus)); have != want {
				t.Errorf("mutex profile reported contention count different from the known true count (%d != %d)", have, want)
			}

			var slowMinTicks, fastMinTicks int64
			for role, ticks := range minTicks {
				for _, delta := range ticks {
					if role == slowRole {
						slowMinTicks += delta
					} else {
						fastMinTicks += delta
					}
				}
			}
			var slowMaxTicks, fastMaxTicks int64
			for role, ticks := range maxTicks {
				for _, delta := range ticks {
					if role == slowRole {
						slowMaxTicks += delta
					} else {
						fastMaxTicks += delta
					}
				}
			}

			cpuGHz := float64(runtime.CyclesPerSecond()) / 1e9
			for _, set := range []struct {
				name     string
				profTime int64
				minTime  int64
				maxTime  int64
			}{
				{
					name:     "slow",
					profTime: profileGrowth[slowRole],
					minTime:  int64(float64(slowMinTicks) / cpuGHz),
					maxTime:  int64(float64(fastMaxTicks) / cpuGHz),
				},
				{
					name:     "fast",
					profTime: profileGrowth[fastRole],
					minTime:  int64(float64(fastMinTicks) / cpuGHz),
					maxTime:  int64(float64(slowMaxTicks) / cpuGHz),
				},
			} {
				t.Logf("profile's view of delays due to %q critical section:                 %dns", set.name, set.profTime)
				t.Logf("test's view of known-contended time within %q critical section:      %dns", set.name, set.minTime)
				t.Logf("test's view of lock duration before critical sections other than %q: %dns", set.name, set.maxTime)

				if set.profTime < set.minTime {
					t.Errorf("profile undercounted %q critical section", set.name)
				}
				if set.profTime > set.maxTime {
					t.Errorf("profile overcounted %q critical section", set.name)
				}
			}

			var totalProfileGrowth float64
			for _, growth := range profileGrowth {
				totalProfileGrowth += float64(growth) * time.Nanosecond.Seconds()
			}

			const slop = 1.5 // account for nanotime vs cputicks
			t.Run("compare timers", func(t *testing.T) {
				testenv.SkipFlaky(t, 64253)
				if totalProfileGrowth > slop*metricGrowth || metricGrowth > slop*totalProfileGrowth {
					t.Errorf("views differ by more than %fx", slop)
				}
			})
		})

		t.Run("sample-2", func(t *testing.T) {
			testenv.SkipFlaky(t, 64253)

			old := runtime.SetMutexProfileFraction(2)
			defer runtime.SetMutexProfileFraction(old)

			needContention.Store(int64(len(mus) - 1))
			metricGrowth, profileGrowth, n, _, explain := testcase(true, stks, workerCount, fn)(t)
			defer func() {
				if t.Failed() {
					explain()
				}
			}()

			// With 100 trials and profile fraction of 2, we expect to capture
			// 50 samples. Allow the test to pass if we get at least 20 samples;
			// the CDF of the binomial distribution says there's less than a
			// 1e-9 chance of that, which is an acceptably low flakiness rate.
			const samplingSlop = 2.5

			if have, want := metricGrowth, baseDelay.Seconds()*float64(len(mus)); samplingSlop*have < want {
				// The test imposes a delay with usleep, verified with calls to
				// nanotime. Compare against the runtime/metrics package's view
				// (based on nanotime) rather than runtime/pprof's view (based
				// on cputicks).
				t.Errorf("runtime/metrics reported less than the known minimum contention duration (%f * %fs < %fs)", samplingSlop, have, want)
			}
			if have, want := n, int64(len(mus)); float64(have) > float64(want)*samplingSlop || float64(want) > float64(have)*samplingSlop {
				t.Errorf("mutex profile reported contention count too different from the expected count (%d far from %d)", have, want)
			}

			var totalProfileGrowth float64
			for _, growth := range profileGrowth {
				totalProfileGrowth += float64(growth) * time.Nanosecond.Seconds()
			}

			const timerSlop = 1.5 * samplingSlop // account for nanotime vs cputicks, plus the two views' independent sampling
			if totalProfileGrowth > timerSlop*metricGrowth || metricGrowth > timerSlop*totalProfileGrowth {
				t.Errorf("views differ by more than %fx", timerSlop)
			}
		})
	})

	t.Run("runtime.semrelease", func(t *testing.T) {
		testenv.SkipFlaky(t, 64253)

		old := runtime.SetMutexProfileFraction(1)
		defer runtime.SetMutexProfileFraction(old)

		const workers = 3
		if runtime.GOMAXPROCS(0) < workers {
			t.Skipf("creating and observing contention on runtime-internal semaphores requires GOMAXPROCS >= %d", workers)
		}

		var sem uint32 = 1
		var tries atomic.Int32
		tries.Store(10_000_000) // prefer controlled failure to timeout
		var sawContention atomic.Int32
		var need int32 = 1
		fn := func() bool {
			if sawContention.Load() >= need {
				return false
			}
			if tries.Add(-1) < 0 {
				return false
			}

			runtime.Semacquire(&sem)
			runtime.Semrelease1(&sem, false, 0)
			if runtime.MutexContended(runtime.SemRootLock(&sem)) {
				sawContention.Add(1)
			}
			return true
		}

		stks := [][]string{
			{
				"runtime.unlock",
				"runtime.semrelease1",
				"runtime_test.TestRuntimeLockMetricsAndProfile.func5.1",
				"runtime_test.(*contentionWorker).run",
			},
			{
				"runtime.unlock",
				"runtime.semacquire1",
				"runtime.semacquire",
				"runtime_test.TestRuntimeLockMetricsAndProfile.func5.1",
				"runtime_test.(*contentionWorker).run",
			},
		}

		// Verify that we get call stack we expect, with anything more than zero
		// cycles / zero samples. The duration of each contention event is too
		// small relative to the expected overhead for us to verify its value
		// more directly. Leave that to the explicit lock/unlock test.

		testcase(false, stks, workers, fn)(t)

		if remaining := tries.Load(); remaining >= 0 {
			t.Logf("finished test early (%d tries remaining)", remaining)
		}
	})
}

func slowMarkerFrame(fn func()) { fn() }
func fastMarkerFrame(fn func()) { fn() }

// contentionWorker provides cleaner call stacks for lock contention profile tests
type contentionWorker struct {
	before func()
	fn     func() bool
	after  func()
}

func (w *contentionWorker) run() {
	defer w.after()
	w.before()

	for w.fn() {
	}
}

func TestCPUStats(t *testing.T) {
	// Run a few GC cycles to get some of the stats to be non-zero.
	runtime.GC()
	runtime.GC()
	runtime.GC()

	// Set GOMAXPROCS high then sleep briefly to ensure we generate
	// some idle time.
	oldmaxprocs := runtime.GOMAXPROCS(10)
	time.Sleep(time.Millisecond)
	runtime.GOMAXPROCS(oldmaxprocs)

	stats := runtime.ReadCPUStats()
	gcTotal := stats.GCAssistTime + stats.GCDedicatedTime + stats.GCIdleTime + stats.GCPauseTime
	if gcTotal != stats.GCTotalTime {
		t.Errorf("manually computed total does not match GCTotalTime: %d cpu-ns vs. %d cpu-ns", gcTotal, stats.GCTotalTime)
	}
	scavTotal := stats.ScavengeAssistTime + stats.ScavengeBgTime
	if scavTotal != stats.ScavengeTotalTime {
		t.Errorf("manually computed total does not match ScavengeTotalTime: %d cpu-ns vs. %d cpu-ns", scavTotal, stats.ScavengeTotalTime)
	}
	total := gcTotal + scavTotal + stats.IdleTime + stats.UserTime
	if total != stats.TotalTime {
		t.Errorf("manually computed overall total does not match TotalTime: %d cpu-ns vs. %d cpu-ns", total, stats.TotalTime)
	}
	if total == 0 {
		t.Error("total time is zero")
	}
	if gcTotal == 0 {
		t.Error("GC total time is zero")
	}
	if stats.IdleTime == 0 {
		t.Error("idle time is zero")
	}
}

func TestMetricHeapUnusedLargeObjectOverflow(t *testing.T) {
	// This test makes sure /memory/classes/heap/unused:bytes
	// doesn't overflow when allocating and deallocating large
	// objects. It is a regression test for #67019.
	done := make(chan struct{})
	var wg sync.WaitGroup
	wg.Add(1)
	go func() {
		defer wg.Done()
		for {
			for range 10 {
				abi.Escape(make([]byte, 1<<20))
			}
			runtime.GC()
			select {
			case <-done:
				return
			default:
			}
		}
	}()
	s := []metrics.Sample{
		{Name: "/memory/classes/heap/unused:bytes"},
	}
	for range 1000 {
		metrics.Read(s)
		if s[0].Value.Uint64() > 1<<40 {
			t.Errorf("overflow")
			break
		}
	}
	done <- struct{}{}
	wg.Wait()
}

func TestReadMetricsCleanups(t *testing.T) {
	runtime.GC()                                                // End any in-progress GC.
	runtime.BlockUntilEmptyCleanupQueue(int64(1 * time.Second)) // Flush any queued cleanups.

	var before [2]metrics.Sample
	before[0].Name = "/gc/cleanups/queued:cleanups"
	before[1].Name = "/gc/cleanups/executed:cleanups"
	after := before

	metrics.Read(before[:])

	const N = 10
	for i := 0; i < N; i++ {
		runtime.AddCleanup(new(*int), func(_ struct{}) {}, struct{}{})
	}

	runtime.GC()
	runtime.BlockUntilEmptyCleanupQueue(int64(1 * time.Second))

	metrics.Read(after[:])

	if v0, v1 := before[0].Value.Uint64(), after[0].Value.Uint64(); v0+N != v1 {
		t.Errorf("expected %s difference to be exactly %d, got %d -> %d", before[0].Name, N, v0, v1)
	}
	if v0, v1 := before[1].Value.Uint64(), after[1].Value.Uint64(); v0+N != v1 {
		t.Errorf("expected %s difference to be exactly %d, got %d -> %d", before[1].Name, N, v0, v1)
	}
}

func TestReadMetricsFinalizers(t *testing.T) {
	runtime.GC()                                                  // End any in-progress GC.
	runtime.BlockUntilEmptyFinalizerQueue(int64(1 * time.Second)) // Flush any queued finalizers.

	var before [2]metrics.Sample
	before[0].Name = "/gc/finalizers/queued:finalizers"
	before[1].Name = "/gc/finalizers/executed:finalizers"
	after := before

	metrics.Read(before[:])

	const N = 10
	for i := 0; i < N; i++ {
		runtime.SetFinalizer(new(*int), func(_ **int) {})
	}

	runtime.GC()
	runtime.GC()
	runtime.BlockUntilEmptyFinalizerQueue(int64(1 * time.Second))

	metrics.Read(after[:])

	if v0, v1 := before[0].Value.Uint64(), after[0].Value.Uint64(); v0+N != v1 {
		t.Errorf("expected %s difference to be exactly %d, got %d -> %d", before[0].Name, N, v0, v1)
	}
	if v0, v1 := before[1].Value.Uint64(), after[1].Value.Uint64(); v0+N != v1 {
		t.Errorf("expected %s difference to be exactly %d, got %d -> %d", before[1].Name, N, v0, v1)
	}
}

func TestReadMetricsSched(t *testing.T) {
	const (
		notInGo = iota
		runnable
		running
		waiting
		created
		threads
		numSamples
	)
	var s [numSamples]metrics.Sample
	s[notInGo].Name = "/sched/goroutines/not-in-go:goroutines"
	s[runnable].Name = "/sched/goroutines/runnable:goroutines"
	s[running].Name = "/sched/goroutines/running:goroutines"
	s[waiting].Name = "/sched/goroutines/waiting:goroutines"
	s[created].Name = "/sched/goroutines-created:goroutines"
	s[threads].Name = "/sched/threads/total:threads"

	logMetrics := func(t *testing.T, s []metrics.Sample) {
		for i := range s {
			t.Logf("%s: %d", s[i].Name, s[i].Value.Uint64())
		}
	}

	// generalSlack is the amount of goroutines we allow ourselves to be
	// off by in any given category, either due to background system
	// goroutines or testing package goroutines.
	const generalSlack = 4

	// waitingSlack is the max number of blocked goroutines left
	// from other tests, the testing package, or system
	// goroutines.
	const waitingSlack = 100

	// threadsSlack is the maximum number of threads left over
	// from other tests and the runtime (sysmon, the template thread, etc.)
	const threadsSlack = 20

	// Make sure GC isn't running, since GC workers interfere with
	// expected counts.
	defer debug.SetGCPercent(debug.SetGCPercent(-1))
	runtime.GC()

	check := func(t *testing.T, s *metrics.Sample, min, max uint64) {
		val := s.Value.Uint64()
		if val < min {
			t.Errorf("%s too low; %d < %d", s.Name, val, min)
		}
		if val > max {
			t.Errorf("%s too high; %d > %d", s.Name, val, max)
		}
	}
	checkEq := func(t *testing.T, s *metrics.Sample, value uint64) {
		check(t, s, value, value)
	}
	spinUntil := func(f func() bool) bool {
		for {
			if f() {
				return true
			}
			time.Sleep(50 * time.Millisecond)
		}
	}

	// Check base values.
	t.Run("base", func(t *testing.T) {
		defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
		metrics.Read(s[:])
		logMetrics(t, s[:])
		check(t, &s[notInGo], 0, generalSlack)
		check(t, &s[runnable], 0, generalSlack)
		checkEq(t, &s[running], 1)
		check(t, &s[waiting], 0, waitingSlack)
	})

	metrics.Read(s[:])
	createdAfterBase := s[created].Value.Uint64()

	// Force Running count to be high. We'll use these goroutines
	// for Runnable, too.
	const count = 10
	var ready, exit atomic.Uint32
	for i := 0; i < count-1; i++ {
		go func() {
			ready.Add(1)
			for exit.Load() == 0 {
				// Spin to get us and keep us running, but check
				// the exit condition so we exit out early if we're
				// done.
				start := time.Now()
				for time.Since(start) < 10*time.Millisecond && exit.Load() == 0 {
				}
				runtime.Gosched()
			}
		}()
	}
	for ready.Load() < count-1 {
		runtime.Gosched()
	}

	// Be careful. We've entered a dangerous state for platforms
	// that do not return back to the underlying system unless all
	// goroutines are blocked, like js/wasm, since we have a bunch
	// of runnable goroutines all spinning. We cannot write anything
	// out.
	if testenv.HasParallelism() {
		t.Run("created", func(t *testing.T) {
			metrics.Read(s[:])
			logMetrics(t, s[:])
			checkEq(t, &s[created], createdAfterBase+count)
		})
		t.Run("running", func(t *testing.T) {
			defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(count + 4))
			// It can take a little bit for the scheduler to
			// distribute the goroutines to Ps, so retry until
			// we see the count we expect or the test times out.
			spinUntil(func() bool {
				metrics.Read(s[:])
				return s[running].Value.Uint64() >= count
			})
			logMetrics(t, s[:])
			check(t, &s[running], count, count+4)
			check(t, &s[threads], count, count+4+threadsSlack)
		})

		// Force runnable count to be high.
		t.Run("runnable", func(t *testing.T) {
			defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
			metrics.Read(s[:])
			logMetrics(t, s[:])
			checkEq(t, &s[running], 1)
			check(t, &s[runnable], count-1, count+generalSlack)
		})

		// Done with the running/runnable goroutines.
		exit.Store(1)
	} else {
		// Read metrics and then exit all the other goroutines,
		// so that system calls may proceed.
		metrics.Read(s[:])

		// Done with the running/runnable goroutines.
		exit.Store(1)

		// Now we can check our invariants.
		t.Run("created", func(t *testing.T) {
			// Look for count-1 goroutines because we read metrics
			// *before* t.Run goroutine was created for this sub-test.
			checkEq(t, &s[created], createdAfterBase+count-1)
		})
		t.Run("running", func(t *testing.T) {
			logMetrics(t, s[:])
			checkEq(t, &s[running], 1)
			checkEq(t, &s[threads], 1)
		})
		t.Run("runnable", func(t *testing.T) {
			logMetrics(t, s[:])
			check(t, &s[runnable], count-1, count+generalSlack)
		})
	}

	// Force not-in-go count to be high. This is a little tricky since
	// we try really hard not to let things block in system calls.
	// We have to drop to the syscall package to do this reliably.
	t.Run("not-in-go", func(t *testing.T) {
		// Block a bunch of goroutines on an OS pipe.
		pr, pw, err := pipe()
		if err != nil {
			switch runtime.GOOS {
			case "js", "wasip1":
				t.Skip("creating pipe:", err)
			}
			t.Fatal("creating pipe:", err)
		}
		for i := 0; i < count; i++ {
			go syscall.Read(pr, make([]byte, 1))
		}

		// Let the goroutines block.
		spinUntil(func() bool {
			metrics.Read(s[:])
			return s[notInGo].Value.Uint64() >= count
		})
		logMetrics(t, s[:])
		check(t, &s[notInGo], count, count+generalSlack)

		syscall.Close(pw)
		syscall.Close(pr)
	})

	t.Run("waiting", func(t *testing.T) {
		// Force waiting count to be high.
		const waitingCount = 1000
		stop := make(chan bool)
		for i := 0; i < waitingCount; i++ {
			go func() { <-stop }()
		}

		// Let the goroutines block.
		spinUntil(func() bool {
			metrics.Read(s[:])
			return s[waiting].Value.Uint64() >= waitingCount
		})
		logMetrics(t, s[:])
		check(t, &s[waiting], waitingCount, waitingCount+waitingSlack)

		close(stop)
	})
}