diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go index af042f44456..b67424301b4 100644 --- a/src/runtime/metrics_test.go +++ b/src/runtime/metrics_test.go @@ -22,7 +22,6 @@ import ( "strings" "sync" "sync/atomic" - "syscall" "testing" "time" "unsafe" @@ -1578,211 +1577,10 @@ func TestReadMetricsFinalizers(t *testing.T) { } func TestReadMetricsSched(t *testing.T) { - const ( - notInGo = iota - runnable - running - waiting - created - threads - numSamples - ) - var s [numSamples]metrics.Sample - s[notInGo].Name = "/sched/goroutines/not-in-go:goroutines" - s[runnable].Name = "/sched/goroutines/runnable:goroutines" - s[running].Name = "/sched/goroutines/running:goroutines" - s[waiting].Name = "/sched/goroutines/waiting:goroutines" - s[created].Name = "/sched/goroutines-created:goroutines" - s[threads].Name = "/sched/threads/total:threads" - - logMetrics := func(t *testing.T, s []metrics.Sample) { - for i := range s { - t.Logf("%s: %d", s[i].Name, s[i].Value.Uint64()) - } + // This test is run in a subprocess to prevent other tests from polluting the metrics. + output := runTestProg(t, "testprog", "SchedMetrics") + want := "OK\n" + if output != want { + t.Fatalf("output:\n%s\n\nwanted:\n%s", output, want) } - - // generalSlack is the amount of goroutines we allow ourselves to be - // off by in any given category, either due to background system - // goroutines or testing package goroutines. - const generalSlack = 4 - - // waitingSlack is the max number of blocked goroutines left - // from other tests, the testing package, or system - // goroutines. - const waitingSlack = 100 - - // threadsSlack is the maximum number of threads left over - // from other tests and the runtime (sysmon, the template thread, etc.) - const threadsSlack = 20 - - // Make sure GC isn't running, since GC workers interfere with - // expected counts. - defer debug.SetGCPercent(debug.SetGCPercent(-1)) - runtime.GC() - - check := func(t *testing.T, s *metrics.Sample, min, max uint64) { - val := s.Value.Uint64() - if val < min { - t.Errorf("%s too low; %d < %d", s.Name, val, min) - } - if val > max { - t.Errorf("%s too high; %d > %d", s.Name, val, max) - } - } - checkEq := func(t *testing.T, s *metrics.Sample, value uint64) { - check(t, s, value, value) - } - spinUntil := func(f func() bool) bool { - for { - if f() { - return true - } - time.Sleep(50 * time.Millisecond) - } - } - - // Check base values. - t.Run("base", func(t *testing.T) { - defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1)) - metrics.Read(s[:]) - logMetrics(t, s[:]) - check(t, &s[notInGo], 0, generalSlack) - check(t, &s[runnable], 0, generalSlack) - checkEq(t, &s[running], 1) - check(t, &s[waiting], 0, waitingSlack) - }) - - metrics.Read(s[:]) - createdAfterBase := s[created].Value.Uint64() - - // Force Running count to be high. We'll use these goroutines - // for Runnable, too. - const count = 10 - var ready, exit atomic.Uint32 - for i := 0; i < count-1; i++ { - go func() { - ready.Add(1) - for exit.Load() == 0 { - // Spin to get us and keep us running, but check - // the exit condition so we exit out early if we're - // done. - start := time.Now() - for time.Since(start) < 10*time.Millisecond && exit.Load() == 0 { - } - runtime.Gosched() - } - }() - } - for ready.Load() < count-1 { - runtime.Gosched() - } - - // Be careful. We've entered a dangerous state for platforms - // that do not return back to the underlying system unless all - // goroutines are blocked, like js/wasm, since we have a bunch - // of runnable goroutines all spinning. We cannot write anything - // out. - if testenv.HasParallelism() { - t.Run("created", func(t *testing.T) { - metrics.Read(s[:]) - logMetrics(t, s[:]) - checkEq(t, &s[created], createdAfterBase+count) - }) - t.Run("running", func(t *testing.T) { - defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(count + 4)) - // It can take a little bit for the scheduler to - // distribute the goroutines to Ps, so retry until - // we see the count we expect or the test times out. - spinUntil(func() bool { - metrics.Read(s[:]) - return s[running].Value.Uint64() >= count - }) - logMetrics(t, s[:]) - check(t, &s[running], count, count+4) - check(t, &s[threads], count, count+4+threadsSlack) - }) - - // Force runnable count to be high. - t.Run("runnable", func(t *testing.T) { - defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1)) - metrics.Read(s[:]) - logMetrics(t, s[:]) - checkEq(t, &s[running], 1) - check(t, &s[runnable], count-1, count+generalSlack) - }) - - // Done with the running/runnable goroutines. - exit.Store(1) - } else { - // Read metrics and then exit all the other goroutines, - // so that system calls may proceed. - metrics.Read(s[:]) - - // Done with the running/runnable goroutines. - exit.Store(1) - - // Now we can check our invariants. - t.Run("created", func(t *testing.T) { - // Look for count-1 goroutines because we read metrics - // *before* t.Run goroutine was created for this sub-test. - checkEq(t, &s[created], createdAfterBase+count-1) - }) - t.Run("running", func(t *testing.T) { - logMetrics(t, s[:]) - checkEq(t, &s[running], 1) - checkEq(t, &s[threads], 1) - }) - t.Run("runnable", func(t *testing.T) { - logMetrics(t, s[:]) - check(t, &s[runnable], count-1, count+generalSlack) - }) - } - - // Force not-in-go count to be high. This is a little tricky since - // we try really hard not to let things block in system calls. - // We have to drop to the syscall package to do this reliably. - t.Run("not-in-go", func(t *testing.T) { - // Block a bunch of goroutines on an OS pipe. - pr, pw, err := pipe() - if err != nil { - switch runtime.GOOS { - case "js", "wasip1": - t.Skip("creating pipe:", err) - } - t.Fatal("creating pipe:", err) - } - for i := 0; i < count; i++ { - go syscall.Read(pr, make([]byte, 1)) - } - - // Let the goroutines block. - spinUntil(func() bool { - metrics.Read(s[:]) - return s[notInGo].Value.Uint64() >= count - }) - logMetrics(t, s[:]) - check(t, &s[notInGo], count, count+generalSlack) - - syscall.Close(pw) - syscall.Close(pr) - }) - - t.Run("waiting", func(t *testing.T) { - // Force waiting count to be high. - const waitingCount = 1000 - stop := make(chan bool) - for i := 0; i < waitingCount; i++ { - go func() { <-stop }() - } - - // Let the goroutines block. - spinUntil(func() bool { - metrics.Read(s[:]) - return s[waiting].Value.Uint64() >= waitingCount - }) - logMetrics(t, s[:]) - check(t, &s[waiting], waitingCount, waitingCount+waitingSlack) - - close(stop) - }) } diff --git a/src/runtime/pipe_unix_test.go b/src/runtime/testdata/testprog/pipe_unix.go similarity index 93% rename from src/runtime/pipe_unix_test.go rename to src/runtime/testdata/testprog/pipe_unix.go index 82a49df3399..cee4da65f6e 100644 --- a/src/runtime/pipe_unix_test.go +++ b/src/runtime/testdata/testprog/pipe_unix.go @@ -4,7 +4,7 @@ //go:build !windows -package runtime_test +package main import "syscall" diff --git a/src/runtime/pipe_windows_test.go b/src/runtime/testdata/testprog/pipe_windows.go similarity index 93% rename from src/runtime/pipe_windows_test.go rename to src/runtime/testdata/testprog/pipe_windows.go index ad84ec918ae..597601a1790 100644 --- a/src/runtime/pipe_windows_test.go +++ b/src/runtime/testdata/testprog/pipe_windows.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package runtime_test +package main import "syscall" diff --git a/src/runtime/testdata/testprog/schedmetrics.go b/src/runtime/testdata/testprog/schedmetrics.go new file mode 100644 index 00000000000..6d3f68a848e --- /dev/null +++ b/src/runtime/testdata/testprog/schedmetrics.go @@ -0,0 +1,267 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "bytes" + "fmt" + "internal/testenv" + "log" + "os" + "runtime" + "runtime/debug" + "runtime/metrics" + "strings" + "sync/atomic" + "syscall" + "time" +) + +func init() { + register("SchedMetrics", SchedMetrics) +} + +// Tests runtime/metrics.Read for various scheduler metrics. +// +// Implemented in testprog to prevent other tests from polluting +// the metrics. +func SchedMetrics() { + const ( + notInGo = iota + runnable + running + waiting + created + threads + numSamples + ) + var s [numSamples]metrics.Sample + s[notInGo].Name = "/sched/goroutines/not-in-go:goroutines" + s[runnable].Name = "/sched/goroutines/runnable:goroutines" + s[running].Name = "/sched/goroutines/running:goroutines" + s[waiting].Name = "/sched/goroutines/waiting:goroutines" + s[created].Name = "/sched/goroutines-created:goroutines" + s[threads].Name = "/sched/threads/total:threads" + + var failed bool + var out bytes.Buffer + logger := log.New(&out, "", 0) + indent := 0 + logf := func(s string, a ...any) { + var prefix strings.Builder + for range indent { + prefix.WriteString("\t") + } + logger.Printf(prefix.String()+s, a...) + } + errorf := func(s string, a ...any) { + logf(s, a...) + failed = true + } + run := func(name string, f func()) { + logf("=== Checking %q", name) + indent++ + f() + indent-- + } + logMetrics := func(s []metrics.Sample) { + for i := range s { + logf("%s: %d", s[i].Name, s[i].Value.Uint64()) + } + } + + // generalSlack is the amount of goroutines we allow ourselves to be + // off by in any given category, either due to background system + // goroutines. This excludes GC goroutines. + generalSlack := uint64(4) + + // waitingSlack is the max number of blocked goroutines controlled + // by the runtime that we'll allow for. This includes GC goroutines + // as well as finalizer and cleanup goroutines. + waitingSlack := generalSlack + uint64(2*runtime.GOMAXPROCS(-1)) + + // threadsSlack is the maximum number of threads left over + // from the runtime (sysmon, the template thread, etc.) + const threadsSlack = 4 + + // Make sure GC isn't running, since GC workers interfere with + // expected counts. + defer debug.SetGCPercent(debug.SetGCPercent(-1)) + runtime.GC() + + check := func(s *metrics.Sample, min, max uint64) { + val := s.Value.Uint64() + if val < min { + errorf("%s too low; %d < %d", s.Name, val, min) + } + if val > max { + errorf("%s too high; %d > %d", s.Name, val, max) + } + } + checkEq := func(s *metrics.Sample, value uint64) { + check(s, value, value) + } + spinUntil := func(f func() bool) bool { + for { + if f() { + return true + } + time.Sleep(50 * time.Millisecond) + } + } + + // Check base values. + run("base", func() { + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1)) + metrics.Read(s[:]) + logMetrics(s[:]) + check(&s[notInGo], 0, generalSlack) + check(&s[runnable], 0, generalSlack) + checkEq(&s[running], 1) + check(&s[waiting], 0, waitingSlack) + }) + + metrics.Read(s[:]) + createdAfterBase := s[created].Value.Uint64() + + // Force Running count to be high. We'll use these goroutines + // for Runnable, too. + const count = 10 + var ready, exit atomic.Uint32 + for range count { + go func() { + ready.Add(1) + for exit.Load() == 0 { + // Spin to get us and keep us running, but check + // the exit condition so we exit out early if we're + // done. + start := time.Now() + for time.Since(start) < 10*time.Millisecond && exit.Load() == 0 { + } + runtime.Gosched() + } + }() + } + for ready.Load() < count { + runtime.Gosched() + } + + // Be careful. We've entered a dangerous state for platforms + // that do not return back to the underlying system unless all + // goroutines are blocked, like js/wasm, since we have a bunch + // of runnable goroutines all spinning. We cannot write anything + // out. + if testenv.HasParallelism() { + run("created", func() { + metrics.Read(s[:]) + logMetrics(s[:]) + checkEq(&s[created], createdAfterBase+count) + }) + run("running", func() { + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(count + 4)) + // It can take a little bit for the scheduler to + // distribute the goroutines to Ps, so retry until + // we see the count we expect or the test times out. + spinUntil(func() bool { + metrics.Read(s[:]) + return s[running].Value.Uint64() >= count + }) + logMetrics(s[:]) + check(&s[running], count, count+4) + check(&s[threads], count, count+4+threadsSlack) + }) + + // Force runnable count to be high. + run("runnable", func() { + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1)) + metrics.Read(s[:]) + logMetrics(s[:]) + checkEq(&s[running], 1) + check(&s[runnable], count-1, count+generalSlack) + }) + + // Done with the running/runnable goroutines. + exit.Store(1) + } else { + // Read metrics and then exit all the other goroutines, + // so that system calls may proceed. + metrics.Read(s[:]) + + // Done with the running/runnable goroutines. + exit.Store(1) + + // Now we can check our invariants. + run("created", func() { + // Look for count-1 goroutines because we read metrics + // *before* run goroutine was created for this sub-test. + checkEq(&s[created], createdAfterBase+count-1) + }) + run("running", func() { + logMetrics(s[:]) + checkEq(&s[running], 1) + checkEq(&s[threads], 1) + }) + run("runnable", func() { + logMetrics(s[:]) + check(&s[runnable], count-1, count+generalSlack) + }) + } + + // Force not-in-go count to be high. This is a little tricky since + // we try really hard not to let things block in system calls. + // We have to drop to the syscall package to do this reliably. + run("not-in-go", func() { + // Block a bunch of goroutines on an OS pipe. + pr, pw, err := pipe() + if err != nil { + switch runtime.GOOS { + case "js", "wasip1": + logf("creating pipe: %v", err) + return + } + panic(fmt.Sprintf("creating pipe: %v", err)) + } + for i := 0; i < count; i++ { + go syscall.Read(pr, make([]byte, 1)) + } + + // Let the goroutines block. + spinUntil(func() bool { + metrics.Read(s[:]) + return s[notInGo].Value.Uint64() >= count + }) + logMetrics(s[:]) + check(&s[notInGo], count, count+generalSlack) + + syscall.Close(pw) + syscall.Close(pr) + }) + + run("waiting", func() { + // Force waiting count to be high. + const waitingCount = 1000 + stop := make(chan bool) + for i := 0; i < waitingCount; i++ { + go func() { <-stop }() + } + + // Let the goroutines block. + spinUntil(func() bool { + metrics.Read(s[:]) + return s[waiting].Value.Uint64() >= waitingCount + }) + logMetrics(s[:]) + check(&s[waiting], waitingCount, waitingCount+waitingSlack) + + close(stop) + }) + + if failed { + fmt.Fprintln(os.Stderr, out.String()) + os.Exit(1) + } else { + fmt.Fprintln(os.Stderr, "OK") + } +}