runtime: track goroutine location until actual STW

TestTraceSTW / TestTraceGCSTW currently tracks the location (M/P) of the
target goroutines until it reaches the "start" log message, assuming the
actual STW comes immediately afterwards.

On 386 with TestTraceGCSTW, it actually tends to take >10ms after the
start log before the STW actually occurs. This is enough time for sysmon
to preempt the target goroutines and migration them to another location.

Fix this by continuing tracking all the way until the STW itself occurs.
We still keep the start log message so we can ignore any STW (if any)
before we expect.

Cq-Include-Trybots: luci.golang.try:gotip-linux-386-longtest,gotip-linux-amd64-longtest
Change-Id: I6a6a636cf2dcb18d8b33ac4ad88333cabff2eabb
Reviewed-on: https://go-review.googlesource.com/c/go/+/722520
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Auto-Submit: Michael Pratt <mpratt@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Michael Pratt 2025-11-20 16:21:09 -05:00 committed by Gopher Robot
parent 1bc54868d4
commit d58b733646

View file

@ -1321,11 +1321,13 @@ func runTestTracesSTW(t *testing.T, run int, name, stwType string) (err error) {
// //
// 2. Once found, track which M and P the target goroutines run on until... // 2. Once found, track which M and P the target goroutines run on until...
// //
// 3. Look for the "TraceSTW" "start" log message, where we commit the // 3. Look for the first STW after the "TraceSTW" "start" log message,
// target goroutines' "before" M and P. // where we commit the target goroutines' "before" M and P.
// //
// N.B. We must do (1) and (2) together because the first target // N.B. We must do (1) and (2) together because the first target
// goroutine may start running before the second is created. // goroutine may start running before the second is created.
var startLogSeen bool
var stwSeen bool
findStart: findStart:
for { for {
ev, err := br.ReadEvent() ev, err := br.ReadEvent()
@ -1384,9 +1386,25 @@ findStart:
// Found start point, move on to next stage. // Found start point, move on to next stage.
t.Logf("Found start message") t.Logf("Found start message")
startLogSeen = true
case trace.EventRangeBegin:
if !startLogSeen {
// Ignore spurious STW before we expect.
continue
}
r := ev.Range()
if r.Name == stwType {
t.Logf("Found STW")
stwSeen = true
break findStart break findStart
} }
} }
}
if !stwSeen {
t.Fatal("Can't find STW in the test trace")
}
t.Log("Target goroutines:") t.Log("Target goroutines:")
for _, gs := range targetGoroutines { for _, gs := range targetGoroutines {
@ -1440,7 +1458,6 @@ findStart:
// [1] This is slightly fragile because there is a small window between // [1] This is slightly fragile because there is a small window between
// the "start" log and actual STW during which the target goroutines // the "start" log and actual STW during which the target goroutines
// could legitimately migrate. // could legitimately migrate.
var stwSeen bool
var pRunning []trace.ProcID var pRunning []trace.ProcID
var gRunning []trace.GoID var gRunning []trace.GoID
findEnd: findEnd:
@ -1543,20 +1560,8 @@ findEnd:
// Found end point. // Found end point.
t.Logf("Found end message") t.Logf("Found end message")
break findEnd break findEnd
case trace.EventRangeBegin:
r := ev.Range()
if r.Name == stwType {
// Note when we see the STW begin. This is not
// load bearing; it's purpose is simply to fail
// the test if we accidentally remove the STW.
stwSeen = true
} }
} }
}
if !stwSeen {
t.Fatal("No STW in the test trace")
}
return nil return nil
} }