runtime/pprof: possibly deflake TestGoroutineLeakProfileConcurrency stress tests

This CL attempts to reduce flakes for two goroutine leak profiler subtests:
 - overlapping_profile_requests
 - overlapping_goroutine_and_goroutine_leak_profile_requests

Watchflake issues #76540 and #76749 currently have ~300 comments between
them. A prior round of deflaking I think reduced the frequency, but
watchflakes is reporting maybe a dozen or so a week recently between
them (though I did not analyze prior or current rate too closely).

These are essentially stress tests that look for data corruption
or data races while exercising the leak profiler concurrently
with itself and concurrently with the traditional goroutine
profiler.

The tests expect to find 6 leaked goroutines, but in some cases
seem to report 5. (This might be due to some rare event keeping
something alive such that 5 is the "correct" result from the
current implementation, though it might not be expected from
a user point of view.)

This CL loosens the check to allow 4, 5, or 6 goroutines.

A bad data corruption or data race might be kind enough to do more than
just alter the count by 1-2, and the test also verifies a
key function is in the detected goroutines, so perhaps this is
OK while people are trying to land CLs as the freeze approaches.

I also filed #79452 for tracking down a better root cause and
improving the test or profiler.

I'm optimistically marking the two main watchflakes issues as fixed
(perhaps too optimistically), but watchflakes will re-open if needed.

Fixes #76749
Fixes #76540
Updates #79409
Updates #79452

Change-Id: I14b4178337c0cf2a2bbd4b8c5256090ae202b8d8
Reviewed-on: https://go-review.googlesource.com/c/go/+/778620
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Georgian-Vlad Saioc <vsaioc@uber.com>
This commit is contained in:
thepudds 2026-05-16 16:32:50 -04:00 committed by t hepudds
parent 6997bcd820
commit 320e0be23d

View file

@ -1719,6 +1719,14 @@ func TestGoroutineLeakProfileConcurrency(t *testing.T) {
t.Errorf("%s profile does not contain expected leaked goroutine %s: %s", profType, leak, profText)
}
}
// TODO(thepudds,vsaioc): the next two subtests would ideally find totalLeaked goroutines,
// but in rare cases they seem to be 1 short, leading to intermittent flakes. Perhaps this
// is "expected" due to a convervative scan keeping something alive or some other rare event.
// Deflake for now by allowing a small margin of error. #79452 is for finding a true root
// cause, improving this test, or adjusting the leak profiler if warranted.
const minWantLeaks = totalLeaked - 1
t.Run("overlapping profile requests", func(t *testing.T) {
ctx := context.Background()
ctx, cancel := context.WithTimeout(ctx, time.Second)
@ -1733,8 +1741,11 @@ func TestGoroutineLeakProfileConcurrency(t *testing.T) {
for ctx.Err() == nil {
var w strings.Builder
goroutineLeakProf.WriteTo(&w, 1)
if n := countLeaks(t, w.String()); n != totalLeaked {
t.Errorf("expected %d goroutines leaked, got %d: %s", totalLeaked, n, w.String())
got := countLeaks(t, w.String())
// TODO(thepudds,vsaioc): see related comment on minWantLeaks above.
if got < minWantLeaks || got > totalLeaked {
t.Errorf("expected at least %d and at most %d goroutines leaked, got %d: %s",
minWantLeaks, totalLeaked, got, w.String())
}
quickCheckForGoroutine(t, "goroutineleak", "runtime/pprof.goroutineLeakExample", w.String())
}
@ -1760,8 +1771,11 @@ func TestGoroutineLeakProfileConcurrency(t *testing.T) {
for ctx.Err() == nil {
var w strings.Builder
goroutineLeakProf.WriteTo(&w, 1)
if n := countLeaks(t, w.String()); n != totalLeaked {
t.Errorf("expected %d goroutines leaked, got %d: %s", totalLeaked, n, w.String())
got := countLeaks(t, w.String())
// TODO(thepudds,vsaioc): see related comment on minWantLeaks above.
if got < minWantLeaks || got > totalLeaked {
t.Errorf("expected at least %d and at most %d goroutines leaked, got %d: %s",
minWantLeaks, totalLeaked, got, w.String())
}
quickCheckForGoroutine(t, "goroutineleak", "runtime/pprof.goroutineLeakExample", w.String())
}