From d58b733646d2d52638b08d5c44afbc7292aeca4d Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Thu, 20 Nov 2025 16:21:09 -0500 Subject: [PATCH] runtime: track goroutine location until actual STW TestTraceSTW / TestTraceGCSTW currently tracks the location (M/P) of the target goroutines until it reaches the "start" log message, assuming the actual STW comes immediately afterwards. On 386 with TestTraceGCSTW, it actually tends to take >10ms after the start log before the STW actually occurs. This is enough time for sysmon to preempt the target goroutines and migration them to another location. Fix this by continuing tracking all the way until the STW itself occurs. We still keep the start log message so we can ignore any STW (if any) before we expect. Cq-Include-Trybots: luci.golang.try:gotip-linux-386-longtest,gotip-linux-amd64-longtest Change-Id: I6a6a636cf2dcb18d8b33ac4ad88333cabff2eabb Reviewed-on: https://go-review.googlesource.com/c/go/+/722520 Reviewed-by: Michael Knyszek Auto-Submit: Michael Pratt LUCI-TryBot-Result: Go LUCI --- src/runtime/proc_test.go | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/src/runtime/proc_test.go b/src/runtime/proc_test.go index 35a1aeab1f6..cff8775cc97 100644 --- a/src/runtime/proc_test.go +++ b/src/runtime/proc_test.go @@ -1321,11 +1321,13 @@ func runTestTracesSTW(t *testing.T, run int, name, stwType string) (err error) { // // 2. Once found, track which M and P the target goroutines run on until... // - // 3. Look for the "TraceSTW" "start" log message, where we commit the - // target goroutines' "before" M and P. + // 3. Look for the first STW after the "TraceSTW" "start" log message, + // where we commit the target goroutines' "before" M and P. // // N.B. We must do (1) and (2) together because the first target // goroutine may start running before the second is created. + var startLogSeen bool + var stwSeen bool findStart: for { ev, err := br.ReadEvent() @@ -1384,10 +1386,26 @@ findStart: // Found start point, move on to next stage. t.Logf("Found start message") - break findStart + startLogSeen = true + case trace.EventRangeBegin: + if !startLogSeen { + // Ignore spurious STW before we expect. + continue + } + + r := ev.Range() + if r.Name == stwType { + t.Logf("Found STW") + stwSeen = true + break findStart + } } } + if !stwSeen { + t.Fatal("Can't find STW in the test trace") + } + t.Log("Target goroutines:") for _, gs := range targetGoroutines { t.Logf("%+v", gs) @@ -1440,7 +1458,6 @@ findStart: // [1] This is slightly fragile because there is a small window between // the "start" log and actual STW during which the target goroutines // could legitimately migrate. - var stwSeen bool var pRunning []trace.ProcID var gRunning []trace.GoID findEnd: @@ -1543,21 +1560,9 @@ findEnd: // Found end point. t.Logf("Found end message") break findEnd - case trace.EventRangeBegin: - r := ev.Range() - if r.Name == stwType { - // Note when we see the STW begin. This is not - // load bearing; it's purpose is simply to fail - // the test if we accidentally remove the STW. - stwSeen = true - } } } - if !stwSeen { - t.Fatal("No STW in the test trace") - } - return nil }