cmd/go: test barrier actions

Add a barrier action between test run action and it's dependencies.
Run will depend on this barrier action, and the barrier action will depend on:
1. The run action's dependencies
2. The previous barrier action

This will force internal/work to schedule test run actions in-order, preventing potential goroutine starvation from
the channel locking mechanism created to force test run actions to start in-order.

Fixes #73106 #61233

Change-Id: I72e9f752f7521093f3c875eef7f2f29b2393fce9
Reviewed-on: https://go-review.googlesource.com/c/go/+/668035
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Matloob <matloob@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Michael Matloob <matloob@google.com>
This commit is contained in:
Jon San Miguel 2025-08-13 13:06:31 -07:00 committed by Michael Matloob
parent 052fcde9fd
commit bca3e98b8a
5 changed files with 71 additions and 10 deletions

View file

@ -1044,11 +1044,36 @@ func runTest(ctx context.Context, cmd *base.Command, args []string) {
prints = append(prints, printTest)
}
// Order runs for coordinating start JSON prints.
// Order runs for coordinating start JSON prints via two mechanisms:
// 1. Channel locking forces runTest actions to start in-order.
// 2. Barrier tasks force runTest actions to be scheduled in-order.
// We need both for performant behavior, as channel locking without the barrier tasks starves the worker pool,
// and barrier tasks without channel locking doesn't guarantee start in-order behavior alone.
var prevBarrier *work.Action
ch := make(chan struct{})
close(ch)
for _, a := range runs {
if r, ok := a.Actor.(*runTestActor); ok {
// Inject a barrier task between the run action and its dependencies.
// This barrier task wil also depend on the previous barrier task.
// This prevents the run task from being scheduled until all previous run dependencies have finished.
// The build graph will be augmented to look roughly like this:
// build("a") build("b") build("c")
// | | |
// barrier("a.test") -> barrier("b.test") -> barrier("c.test")
// | | |
// run("a.test") run("b.test") run("c.test")
barrier := &work.Action{
Mode: "test barrier",
Deps: slices.Clip(a.Deps),
}
if prevBarrier != nil {
barrier.Deps = append(barrier.Deps, prevBarrier)
}
a.Deps = []*work.Action{barrier}
prevBarrier = barrier
r.prev = ch
ch = make(chan struct{})
r.next = ch
@ -1400,6 +1425,8 @@ func (lockedStdout) Write(b []byte) (int, error) {
func (r *runTestActor) Act(b *work.Builder, ctx context.Context, a *work.Action) error {
sh := b.Shell(a)
barrierAction := a.Deps[0]
buildAction := barrierAction.Deps[0]
// Wait for previous test to get started and print its first json line.
select {
@ -1530,7 +1557,7 @@ func (r *runTestActor) Act(b *work.Builder, ctx context.Context, a *work.Action)
// we have different link inputs but the same final binary,
// we still reuse the cached test result.
// c.saveOutput will store the result under both IDs.
r.c.tryCacheWithID(b, a, a.Deps[0].BuildContentID())
r.c.tryCacheWithID(b, a, buildAction.BuildContentID())
}
if r.c.buf != nil {
if stdout != &buf {
@ -1581,7 +1608,7 @@ func (r *runTestActor) Act(b *work.Builder, ctx context.Context, a *work.Action)
// fresh copies of tools to test as part of the testing.
addToEnv = "GOCOVERDIR=" + gcd
}
args := str.StringList(execCmd, a.Deps[0].BuiltTarget(), testlogArg, panicArg, fuzzArg, coverdirArg, testArgs)
args := str.StringList(execCmd, buildAction.BuiltTarget(), testlogArg, panicArg, fuzzArg, coverdirArg, testArgs)
if testCoverProfile != "" {
// Write coverage to temporary profile, for merging later.
@ -1741,8 +1768,8 @@ func (r *runTestActor) Act(b *work.Builder, ctx context.Context, a *work.Action)
// tryCache is called just before the link attempt,
// to see if the test result is cached and therefore the link is unneeded.
// It reports whether the result can be satisfied from cache.
func (c *runCache) tryCache(b *work.Builder, a *work.Action) bool {
return c.tryCacheWithID(b, a, a.Deps[0].BuildActionID())
func (c *runCache) tryCache(b *work.Builder, a *work.Action, linkAction *work.Action) bool {
return c.tryCacheWithID(b, a, linkAction.BuildActionID())
}
func (c *runCache) tryCacheWithID(b *work.Builder, a *work.Action, id string) bool {

View file

@ -92,7 +92,7 @@ type Action struct {
buggyInstall bool // is this a buggy install (see -linkshared)?
TryCache func(*Builder, *Action) bool // callback for cache bypass
TryCache func(*Builder, *Action, *Action) bool // callback for cache bypass
CacheExecutable bool // Whether to cache executables produced by link steps

View file

@ -401,6 +401,25 @@ var (
stdlibRecompiledIncOnce = sync.OnceFunc(stdlibRecompiled.Inc)
)
// testRunAction returns the run action for a test given the link action
// for the test binary, if the only (non-test-barrier) action that depend
// on the link action is the run action.
func testRunAction(a *Action) *Action {
if len(a.triggers) != 1 || a.triggers[0].Mode != "test barrier" {
return nil
}
var runAction *Action
for _, t := range a.triggers[0].triggers {
if t.Mode == "test run" {
if runAction != nil {
return nil
}
runAction = t
}
}
return runAction
}
// useCache tries to satisfy the action a, which has action ID actionHash,
// by using a cached result from an earlier build.
// If useCache decides that the cache can be used, it sets a.buildID
@ -526,7 +545,7 @@ func (b *Builder) useCache(a *Action, actionHash cache.ActionID, target string,
// then to avoid the link step, report the link as up-to-date.
// We avoid the nested build ID problem in the previous special case
// by recording the test results in the cache under the action ID half.
if len(a.triggers) == 1 && a.triggers[0].TryCache != nil && a.triggers[0].TryCache(b, a.triggers[0]) {
if ra := testRunAction(a); ra != nil && ra.TryCache != nil && ra.TryCache(b, ra, a) {
// Best effort attempt to display output from the compile and link steps.
// If it doesn't work, it doesn't work: reusing the test result is more
// important than reprinting diagnostic information.

View file

@ -36,8 +36,9 @@ func (b *Builder) CovData(a *Action, cmdargs ...any) ([]byte, error) {
// but will be empty; in this case the return is an empty string.
func BuildActionCoverMetaFile(runAct *Action) (string, error) {
p := runAct.Package
for i := range runAct.Deps {
pred := runAct.Deps[i]
barrierAct := runAct.Deps[0]
for i := range barrierAct.Deps {
pred := barrierAct.Deps[i]
if pred.Mode != "build" || pred.Package == nil {
continue
}

View file

@ -183,8 +183,22 @@ func (b *Builder) Do(ctx context.Context, root *Action) {
for _, a0 := range a.triggers {
if a.Failed != nil {
if a0.Mode == "test barrier" {
// If this action was triggered by a test, there
// will be a test barrier action in between the test
// and the true trigger. But there will be other
// triggers that are other barriers that are waiting
// for this one. Propagate the failure to the true
// trigger, but not to the other barriers.
for _, bt := range a0.triggers {
if bt.Mode != "test barrier" {
bt.Failed = a.Failed
}
}
} else {
a0.Failed = a.Failed
}
}
if a0.pending--; a0.pending == 0 {
b.ready.push(a0)
b.readySema <- true