mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
runtime: move TestReadMetricsSched to testprog
There are just too many flakes resulting from background pollution by the testing package and other tests. Run in a subprocess where at least the environment can be more tightly controlled. Fixes #75049. Change-Id: Iad59edaaf31268f1fcb77273f01317d963708fa6 Reviewed-on: https://go-review.googlesource.com/c/go/+/707155 Reviewed-by: Michael Pratt <mpratt@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Auto-Submit: Michael Knyszek <mknyszek@google.com>
This commit is contained in:
parent
459f3a3adc
commit
16ae11a9e1
4 changed files with 274 additions and 209 deletions
|
|
@ -22,7 +22,6 @@ import (
|
|||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
"testing"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
|
@ -1578,211 +1577,10 @@ func TestReadMetricsFinalizers(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestReadMetricsSched(t *testing.T) {
|
||||
const (
|
||||
notInGo = iota
|
||||
runnable
|
||||
running
|
||||
waiting
|
||||
created
|
||||
threads
|
||||
numSamples
|
||||
)
|
||||
var s [numSamples]metrics.Sample
|
||||
s[notInGo].Name = "/sched/goroutines/not-in-go:goroutines"
|
||||
s[runnable].Name = "/sched/goroutines/runnable:goroutines"
|
||||
s[running].Name = "/sched/goroutines/running:goroutines"
|
||||
s[waiting].Name = "/sched/goroutines/waiting:goroutines"
|
||||
s[created].Name = "/sched/goroutines-created:goroutines"
|
||||
s[threads].Name = "/sched/threads/total:threads"
|
||||
|
||||
logMetrics := func(t *testing.T, s []metrics.Sample) {
|
||||
for i := range s {
|
||||
t.Logf("%s: %d", s[i].Name, s[i].Value.Uint64())
|
||||
}
|
||||
// This test is run in a subprocess to prevent other tests from polluting the metrics.
|
||||
output := runTestProg(t, "testprog", "SchedMetrics")
|
||||
want := "OK\n"
|
||||
if output != want {
|
||||
t.Fatalf("output:\n%s\n\nwanted:\n%s", output, want)
|
||||
}
|
||||
|
||||
// generalSlack is the amount of goroutines we allow ourselves to be
|
||||
// off by in any given category, either due to background system
|
||||
// goroutines or testing package goroutines.
|
||||
const generalSlack = 4
|
||||
|
||||
// waitingSlack is the max number of blocked goroutines left
|
||||
// from other tests, the testing package, or system
|
||||
// goroutines.
|
||||
const waitingSlack = 100
|
||||
|
||||
// threadsSlack is the maximum number of threads left over
|
||||
// from other tests and the runtime (sysmon, the template thread, etc.)
|
||||
const threadsSlack = 20
|
||||
|
||||
// Make sure GC isn't running, since GC workers interfere with
|
||||
// expected counts.
|
||||
defer debug.SetGCPercent(debug.SetGCPercent(-1))
|
||||
runtime.GC()
|
||||
|
||||
check := func(t *testing.T, s *metrics.Sample, min, max uint64) {
|
||||
val := s.Value.Uint64()
|
||||
if val < min {
|
||||
t.Errorf("%s too low; %d < %d", s.Name, val, min)
|
||||
}
|
||||
if val > max {
|
||||
t.Errorf("%s too high; %d > %d", s.Name, val, max)
|
||||
}
|
||||
}
|
||||
checkEq := func(t *testing.T, s *metrics.Sample, value uint64) {
|
||||
check(t, s, value, value)
|
||||
}
|
||||
spinUntil := func(f func() bool) bool {
|
||||
for {
|
||||
if f() {
|
||||
return true
|
||||
}
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
|
||||
// Check base values.
|
||||
t.Run("base", func(t *testing.T) {
|
||||
defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
|
||||
metrics.Read(s[:])
|
||||
logMetrics(t, s[:])
|
||||
check(t, &s[notInGo], 0, generalSlack)
|
||||
check(t, &s[runnable], 0, generalSlack)
|
||||
checkEq(t, &s[running], 1)
|
||||
check(t, &s[waiting], 0, waitingSlack)
|
||||
})
|
||||
|
||||
metrics.Read(s[:])
|
||||
createdAfterBase := s[created].Value.Uint64()
|
||||
|
||||
// Force Running count to be high. We'll use these goroutines
|
||||
// for Runnable, too.
|
||||
const count = 10
|
||||
var ready, exit atomic.Uint32
|
||||
for i := 0; i < count-1; i++ {
|
||||
go func() {
|
||||
ready.Add(1)
|
||||
for exit.Load() == 0 {
|
||||
// Spin to get us and keep us running, but check
|
||||
// the exit condition so we exit out early if we're
|
||||
// done.
|
||||
start := time.Now()
|
||||
for time.Since(start) < 10*time.Millisecond && exit.Load() == 0 {
|
||||
}
|
||||
runtime.Gosched()
|
||||
}
|
||||
}()
|
||||
}
|
||||
for ready.Load() < count-1 {
|
||||
runtime.Gosched()
|
||||
}
|
||||
|
||||
// Be careful. We've entered a dangerous state for platforms
|
||||
// that do not return back to the underlying system unless all
|
||||
// goroutines are blocked, like js/wasm, since we have a bunch
|
||||
// of runnable goroutines all spinning. We cannot write anything
|
||||
// out.
|
||||
if testenv.HasParallelism() {
|
||||
t.Run("created", func(t *testing.T) {
|
||||
metrics.Read(s[:])
|
||||
logMetrics(t, s[:])
|
||||
checkEq(t, &s[created], createdAfterBase+count)
|
||||
})
|
||||
t.Run("running", func(t *testing.T) {
|
||||
defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(count + 4))
|
||||
// It can take a little bit for the scheduler to
|
||||
// distribute the goroutines to Ps, so retry until
|
||||
// we see the count we expect or the test times out.
|
||||
spinUntil(func() bool {
|
||||
metrics.Read(s[:])
|
||||
return s[running].Value.Uint64() >= count
|
||||
})
|
||||
logMetrics(t, s[:])
|
||||
check(t, &s[running], count, count+4)
|
||||
check(t, &s[threads], count, count+4+threadsSlack)
|
||||
})
|
||||
|
||||
// Force runnable count to be high.
|
||||
t.Run("runnable", func(t *testing.T) {
|
||||
defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
|
||||
metrics.Read(s[:])
|
||||
logMetrics(t, s[:])
|
||||
checkEq(t, &s[running], 1)
|
||||
check(t, &s[runnable], count-1, count+generalSlack)
|
||||
})
|
||||
|
||||
// Done with the running/runnable goroutines.
|
||||
exit.Store(1)
|
||||
} else {
|
||||
// Read metrics and then exit all the other goroutines,
|
||||
// so that system calls may proceed.
|
||||
metrics.Read(s[:])
|
||||
|
||||
// Done with the running/runnable goroutines.
|
||||
exit.Store(1)
|
||||
|
||||
// Now we can check our invariants.
|
||||
t.Run("created", func(t *testing.T) {
|
||||
// Look for count-1 goroutines because we read metrics
|
||||
// *before* t.Run goroutine was created for this sub-test.
|
||||
checkEq(t, &s[created], createdAfterBase+count-1)
|
||||
})
|
||||
t.Run("running", func(t *testing.T) {
|
||||
logMetrics(t, s[:])
|
||||
checkEq(t, &s[running], 1)
|
||||
checkEq(t, &s[threads], 1)
|
||||
})
|
||||
t.Run("runnable", func(t *testing.T) {
|
||||
logMetrics(t, s[:])
|
||||
check(t, &s[runnable], count-1, count+generalSlack)
|
||||
})
|
||||
}
|
||||
|
||||
// Force not-in-go count to be high. This is a little tricky since
|
||||
// we try really hard not to let things block in system calls.
|
||||
// We have to drop to the syscall package to do this reliably.
|
||||
t.Run("not-in-go", func(t *testing.T) {
|
||||
// Block a bunch of goroutines on an OS pipe.
|
||||
pr, pw, err := pipe()
|
||||
if err != nil {
|
||||
switch runtime.GOOS {
|
||||
case "js", "wasip1":
|
||||
t.Skip("creating pipe:", err)
|
||||
}
|
||||
t.Fatal("creating pipe:", err)
|
||||
}
|
||||
for i := 0; i < count; i++ {
|
||||
go syscall.Read(pr, make([]byte, 1))
|
||||
}
|
||||
|
||||
// Let the goroutines block.
|
||||
spinUntil(func() bool {
|
||||
metrics.Read(s[:])
|
||||
return s[notInGo].Value.Uint64() >= count
|
||||
})
|
||||
logMetrics(t, s[:])
|
||||
check(t, &s[notInGo], count, count+generalSlack)
|
||||
|
||||
syscall.Close(pw)
|
||||
syscall.Close(pr)
|
||||
})
|
||||
|
||||
t.Run("waiting", func(t *testing.T) {
|
||||
// Force waiting count to be high.
|
||||
const waitingCount = 1000
|
||||
stop := make(chan bool)
|
||||
for i := 0; i < waitingCount; i++ {
|
||||
go func() { <-stop }()
|
||||
}
|
||||
|
||||
// Let the goroutines block.
|
||||
spinUntil(func() bool {
|
||||
metrics.Read(s[:])
|
||||
return s[waiting].Value.Uint64() >= waitingCount
|
||||
})
|
||||
logMetrics(t, s[:])
|
||||
check(t, &s[waiting], waitingCount, waitingCount+waitingSlack)
|
||||
|
||||
close(stop)
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
//go:build !windows
|
||||
|
||||
package runtime_test
|
||||
package main
|
||||
|
||||
import "syscall"
|
||||
|
||||
|
|
@ -2,7 +2,7 @@
|
|||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package runtime_test
|
||||
package main
|
||||
|
||||
import "syscall"
|
||||
|
||||
267
src/runtime/testdata/testprog/schedmetrics.go
vendored
Normal file
267
src/runtime/testdata/testprog/schedmetrics.go
vendored
Normal file
|
|
@ -0,0 +1,267 @@
|
|||
// Copyright 2025 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"internal/testenv"
|
||||
"log"
|
||||
"os"
|
||||
"runtime"
|
||||
"runtime/debug"
|
||||
"runtime/metrics"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
"time"
|
||||
)
|
||||
|
||||
func init() {
|
||||
register("SchedMetrics", SchedMetrics)
|
||||
}
|
||||
|
||||
// Tests runtime/metrics.Read for various scheduler metrics.
|
||||
//
|
||||
// Implemented in testprog to prevent other tests from polluting
|
||||
// the metrics.
|
||||
func SchedMetrics() {
|
||||
const (
|
||||
notInGo = iota
|
||||
runnable
|
||||
running
|
||||
waiting
|
||||
created
|
||||
threads
|
||||
numSamples
|
||||
)
|
||||
var s [numSamples]metrics.Sample
|
||||
s[notInGo].Name = "/sched/goroutines/not-in-go:goroutines"
|
||||
s[runnable].Name = "/sched/goroutines/runnable:goroutines"
|
||||
s[running].Name = "/sched/goroutines/running:goroutines"
|
||||
s[waiting].Name = "/sched/goroutines/waiting:goroutines"
|
||||
s[created].Name = "/sched/goroutines-created:goroutines"
|
||||
s[threads].Name = "/sched/threads/total:threads"
|
||||
|
||||
var failed bool
|
||||
var out bytes.Buffer
|
||||
logger := log.New(&out, "", 0)
|
||||
indent := 0
|
||||
logf := func(s string, a ...any) {
|
||||
var prefix strings.Builder
|
||||
for range indent {
|
||||
prefix.WriteString("\t")
|
||||
}
|
||||
logger.Printf(prefix.String()+s, a...)
|
||||
}
|
||||
errorf := func(s string, a ...any) {
|
||||
logf(s, a...)
|
||||
failed = true
|
||||
}
|
||||
run := func(name string, f func()) {
|
||||
logf("=== Checking %q", name)
|
||||
indent++
|
||||
f()
|
||||
indent--
|
||||
}
|
||||
logMetrics := func(s []metrics.Sample) {
|
||||
for i := range s {
|
||||
logf("%s: %d", s[i].Name, s[i].Value.Uint64())
|
||||
}
|
||||
}
|
||||
|
||||
// generalSlack is the amount of goroutines we allow ourselves to be
|
||||
// off by in any given category, either due to background system
|
||||
// goroutines. This excludes GC goroutines.
|
||||
generalSlack := uint64(4)
|
||||
|
||||
// waitingSlack is the max number of blocked goroutines controlled
|
||||
// by the runtime that we'll allow for. This includes GC goroutines
|
||||
// as well as finalizer and cleanup goroutines.
|
||||
waitingSlack := generalSlack + uint64(2*runtime.GOMAXPROCS(-1))
|
||||
|
||||
// threadsSlack is the maximum number of threads left over
|
||||
// from the runtime (sysmon, the template thread, etc.)
|
||||
const threadsSlack = 4
|
||||
|
||||
// Make sure GC isn't running, since GC workers interfere with
|
||||
// expected counts.
|
||||
defer debug.SetGCPercent(debug.SetGCPercent(-1))
|
||||
runtime.GC()
|
||||
|
||||
check := func(s *metrics.Sample, min, max uint64) {
|
||||
val := s.Value.Uint64()
|
||||
if val < min {
|
||||
errorf("%s too low; %d < %d", s.Name, val, min)
|
||||
}
|
||||
if val > max {
|
||||
errorf("%s too high; %d > %d", s.Name, val, max)
|
||||
}
|
||||
}
|
||||
checkEq := func(s *metrics.Sample, value uint64) {
|
||||
check(s, value, value)
|
||||
}
|
||||
spinUntil := func(f func() bool) bool {
|
||||
for {
|
||||
if f() {
|
||||
return true
|
||||
}
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
|
||||
// Check base values.
|
||||
run("base", func() {
|
||||
defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
|
||||
metrics.Read(s[:])
|
||||
logMetrics(s[:])
|
||||
check(&s[notInGo], 0, generalSlack)
|
||||
check(&s[runnable], 0, generalSlack)
|
||||
checkEq(&s[running], 1)
|
||||
check(&s[waiting], 0, waitingSlack)
|
||||
})
|
||||
|
||||
metrics.Read(s[:])
|
||||
createdAfterBase := s[created].Value.Uint64()
|
||||
|
||||
// Force Running count to be high. We'll use these goroutines
|
||||
// for Runnable, too.
|
||||
const count = 10
|
||||
var ready, exit atomic.Uint32
|
||||
for range count {
|
||||
go func() {
|
||||
ready.Add(1)
|
||||
for exit.Load() == 0 {
|
||||
// Spin to get us and keep us running, but check
|
||||
// the exit condition so we exit out early if we're
|
||||
// done.
|
||||
start := time.Now()
|
||||
for time.Since(start) < 10*time.Millisecond && exit.Load() == 0 {
|
||||
}
|
||||
runtime.Gosched()
|
||||
}
|
||||
}()
|
||||
}
|
||||
for ready.Load() < count {
|
||||
runtime.Gosched()
|
||||
}
|
||||
|
||||
// Be careful. We've entered a dangerous state for platforms
|
||||
// that do not return back to the underlying system unless all
|
||||
// goroutines are blocked, like js/wasm, since we have a bunch
|
||||
// of runnable goroutines all spinning. We cannot write anything
|
||||
// out.
|
||||
if testenv.HasParallelism() {
|
||||
run("created", func() {
|
||||
metrics.Read(s[:])
|
||||
logMetrics(s[:])
|
||||
checkEq(&s[created], createdAfterBase+count)
|
||||
})
|
||||
run("running", func() {
|
||||
defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(count + 4))
|
||||
// It can take a little bit for the scheduler to
|
||||
// distribute the goroutines to Ps, so retry until
|
||||
// we see the count we expect or the test times out.
|
||||
spinUntil(func() bool {
|
||||
metrics.Read(s[:])
|
||||
return s[running].Value.Uint64() >= count
|
||||
})
|
||||
logMetrics(s[:])
|
||||
check(&s[running], count, count+4)
|
||||
check(&s[threads], count, count+4+threadsSlack)
|
||||
})
|
||||
|
||||
// Force runnable count to be high.
|
||||
run("runnable", func() {
|
||||
defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
|
||||
metrics.Read(s[:])
|
||||
logMetrics(s[:])
|
||||
checkEq(&s[running], 1)
|
||||
check(&s[runnable], count-1, count+generalSlack)
|
||||
})
|
||||
|
||||
// Done with the running/runnable goroutines.
|
||||
exit.Store(1)
|
||||
} else {
|
||||
// Read metrics and then exit all the other goroutines,
|
||||
// so that system calls may proceed.
|
||||
metrics.Read(s[:])
|
||||
|
||||
// Done with the running/runnable goroutines.
|
||||
exit.Store(1)
|
||||
|
||||
// Now we can check our invariants.
|
||||
run("created", func() {
|
||||
// Look for count-1 goroutines because we read metrics
|
||||
// *before* run goroutine was created for this sub-test.
|
||||
checkEq(&s[created], createdAfterBase+count-1)
|
||||
})
|
||||
run("running", func() {
|
||||
logMetrics(s[:])
|
||||
checkEq(&s[running], 1)
|
||||
checkEq(&s[threads], 1)
|
||||
})
|
||||
run("runnable", func() {
|
||||
logMetrics(s[:])
|
||||
check(&s[runnable], count-1, count+generalSlack)
|
||||
})
|
||||
}
|
||||
|
||||
// Force not-in-go count to be high. This is a little tricky since
|
||||
// we try really hard not to let things block in system calls.
|
||||
// We have to drop to the syscall package to do this reliably.
|
||||
run("not-in-go", func() {
|
||||
// Block a bunch of goroutines on an OS pipe.
|
||||
pr, pw, err := pipe()
|
||||
if err != nil {
|
||||
switch runtime.GOOS {
|
||||
case "js", "wasip1":
|
||||
logf("creating pipe: %v", err)
|
||||
return
|
||||
}
|
||||
panic(fmt.Sprintf("creating pipe: %v", err))
|
||||
}
|
||||
for i := 0; i < count; i++ {
|
||||
go syscall.Read(pr, make([]byte, 1))
|
||||
}
|
||||
|
||||
// Let the goroutines block.
|
||||
spinUntil(func() bool {
|
||||
metrics.Read(s[:])
|
||||
return s[notInGo].Value.Uint64() >= count
|
||||
})
|
||||
logMetrics(s[:])
|
||||
check(&s[notInGo], count, count+generalSlack)
|
||||
|
||||
syscall.Close(pw)
|
||||
syscall.Close(pr)
|
||||
})
|
||||
|
||||
run("waiting", func() {
|
||||
// Force waiting count to be high.
|
||||
const waitingCount = 1000
|
||||
stop := make(chan bool)
|
||||
for i := 0; i < waitingCount; i++ {
|
||||
go func() { <-stop }()
|
||||
}
|
||||
|
||||
// Let the goroutines block.
|
||||
spinUntil(func() bool {
|
||||
metrics.Read(s[:])
|
||||
return s[waiting].Value.Uint64() >= waitingCount
|
||||
})
|
||||
logMetrics(s[:])
|
||||
check(&s[waiting], waitingCount, waitingCount+waitingSlack)
|
||||
|
||||
close(stop)
|
||||
})
|
||||
|
||||
if failed {
|
||||
fmt.Fprintln(os.Stderr, out.String())
|
||||
os.Exit(1)
|
||||
} else {
|
||||
fmt.Fprintln(os.Stderr, "OK")
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue