cmd/go: limit total compile -c backend concurrency using a pool

Previously we limited the value we passed in to compile -c (which set
the number of SSA compile goroutines that run at one time) to 4. This CL
allows the -c value to go up to GOMAXPROCS, while limiting the total
number of backend SSA compile goroutines to still be less than the
previous worst case of 4*GOMAXPROCS (actually four times the value of
the -p flag, but the default is GOMAXPROCS). We do that by keeping a
pool of tokens to represent the total number of SSA compile goroutines
(with some buffer to allow us to run out of tokens and not exceed
4*GOMAXPROCS). Each time a compile requests a -c value, we'll hand out
half of the remaining tokens (with the number handed otu capped at
GOMAXPROCS) until we run out of tokens, in wich case we'll set -c to
one.

This leads to a speed up of 3-10% on the 16 core intel perf builder and
5-16% on the 88 core builder on the Sweet go-build benchmark.

Change-Id: Ib1ec843fee57f0fb8d36a507162317276a6a6964
Reviewed-on: https://go-review.googlesource.com/c/go/+/724142
Reviewed-by: Alan Donovan <adonovan@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Matloob <matloob@google.com>
This commit is contained in:
Michael Matloob 2025-11-24 18:33:30 -05:00
parent 3c6bf6fbf3
commit 623ef28135
2 changed files with 57 additions and 21 deletions

View file

@ -15,6 +15,7 @@ import (
"path/filepath" "path/filepath"
"runtime" "runtime"
"strings" "strings"
"sync"
"cmd/go/internal/base" "cmd/go/internal/base"
"cmd/go/internal/cfg" "cmd/go/internal/cfg"
@ -127,7 +128,9 @@ func (gcToolchain) gc(b *Builder, a *Action, archive string, importcfg, embedcfg
gcflags = append(gcflags, fuzzInstrumentFlags()...) gcflags = append(gcflags, fuzzInstrumentFlags()...)
} }
// Add -c=N to use concurrent backend compilation, if possible. // Add -c=N to use concurrent backend compilation, if possible.
if c := gcBackendConcurrency(gcflags); c > 1 { c, release := compilerConcurrency()
defer release()
if c > 1 {
defaultGcFlags = append(defaultGcFlags, fmt.Sprintf("-c=%d", c)) defaultGcFlags = append(defaultGcFlags, fmt.Sprintf("-c=%d", c))
} }
@ -177,8 +180,9 @@ func (gcToolchain) gc(b *Builder, a *Action, archive string, importcfg, embedcfg
return ofile, output, err return ofile, output, err
} }
// gcBackendConcurrency returns the backend compiler concurrency level for a package compilation. // compilerConcurrency returns the compiler concurrency level for a package compilation.
func gcBackendConcurrency(gcflags []string) int { // The returned function must be called after the compile finishes.
func compilerConcurrency() (int, func()) {
// First, check whether we can use -c at all for this compilation. // First, check whether we can use -c at all for this compilation.
canDashC := concurrentGCBackendCompilationEnabledByDefault canDashC := concurrentGCBackendCompilationEnabledByDefault
@ -199,7 +203,7 @@ func gcBackendConcurrency(gcflags []string) int {
} }
if !canDashC { if !canDashC {
return 1 return 1, func() {}
} }
// Decide how many concurrent backend compilations to allow. // Decide how many concurrent backend compilations to allow.
@ -212,29 +216,60 @@ func gcBackendConcurrency(gcflags []string) int {
// of the overall compiler execution, so c==1 for much of the build. // of the overall compiler execution, so c==1 for much of the build.
// So don't worry too much about that interaction for now. // So don't worry too much about that interaction for now.
// //
// However, in practice, setting c above 4 tends not to help very much. // But to keep things reasonable, we maintain a cap on the total number of
// See the analysis in CL 41192. // concurrent backend compiles. (If we gave each compile action the full GOMAXPROCS, we could
// potentially have GOMAXPROCS^2 running compile goroutines) In the past, we'd limit
// the number of concurrent backend compiles per process to 4, which would result in a worst-case number
// of backend compiles of 4*cfg.BuildP. Because some compile processes benefit from having
// a larger number of compiles, especially when the compile action is the only
// action running, we'll allow the max value to be larger, but ensure that the
// total number of backend compiles never exceeds that previous worst-case number.
// This is implemented using a pool of tokens that are given out. We'll set aside enough
// tokens to make sure we don't run out, and then give half of the remaining tokens (up to
// GOMAXPROCS) to each compile action that requests it.
// //
// TODO(josharian): attempt to detect whether this particular compilation // As a user, to limit parallelism, set GOMAXPROCS below numCPU; this may be useful
// is likely to be a bottleneck, e.g. when:
// - it has no successor packages to compile (usually package main)
// - all paths through the build graph pass through it
// - critical path scheduling says it is high priority
// and in such a case, set c to runtime.GOMAXPROCS(0).
// By default this is the same as runtime.NumCPU.
// We do this now when p==1.
// To limit parallelism, set GOMAXPROCS below numCPU; this may be useful
// on a low-memory builder, or if a deterministic build order is required. // on a low-memory builder, or if a deterministic build order is required.
c := runtime.GOMAXPROCS(0)
if cfg.BuildP == 1 { if cfg.BuildP == 1 {
// No process parallelism, do not cap compiler parallelism. // No process parallelism, do not cap compiler parallelism.
return c return maxCompilerConcurrency, func() {}
} }
// Some process parallelism. Set c to min(4, maxprocs).
if c > 4 { // Cap compiler parallelism using the pool.
c = 4 tokensMu.Lock()
defer tokensMu.Unlock()
concurrentProcesses++
// Set aside tokens so that we don't run out if we were running cfg.BuildP concurrent compiles.
// We'll set aside one token for each of the action goroutines that aren't currently running a compile.
setAside := cfg.BuildP - concurrentProcesses
availableTokens := tokens - setAside
// Grab half the remaining tokens: but with a floor of at least 1 token, and
// a ceiling of the max backend concurrency.
c := max(min(availableTokens/2, maxCompilerConcurrency), 1)
tokens -= c
// Successfully grabbed the tokens.
return c, func() {
tokensMu.Lock()
defer tokensMu.Unlock()
tokens += c
} }
return c }
var maxCompilerConcurrency = runtime.GOMAXPROCS(0) // max value we will use for -c
var (
tokensMu sync.Mutex
tokens int // number of available tokens
concurrentProcesses int // number of currently running compiles
)
// initCompilerConcurrencyPool sets the number of tokens in the pool. It needs
// to be run after init, so that it can use the value of cfg.BuildP.
func initCompilerConcurrencyPool() {
// Size the pool so that the worst case total number of compiles is not more
// than what it was when we capped the concurrency to 4.
oldConcurrencyCap := min(4, maxCompilerConcurrency)
tokens = oldConcurrencyCap * cfg.BuildP
} }
// trimpath returns the -trimpath argument to use // trimpath returns the -trimpath argument to use

View file

@ -60,6 +60,7 @@ func BuildInit(loaderstate *modload.State) {
modload.Init(loaderstate) modload.Init(loaderstate)
instrumentInit() instrumentInit()
buildModeInit() buildModeInit()
initCompilerConcurrencyPool()
cfgChangedEnv = makeCfgChangedEnv() cfgChangedEnv = makeCfgChangedEnv()
if err := fsys.Init(); err != nil { if err := fsys.Init(); err != nil {