cmd/compile: propagate desired registers through phi nodes

Do the TODO in computeDesired to trace desired-register info through phis.
Propagate the phi's desired registers back to each phi input and
record them against that input's predecessor.

Innermost loop back-edges are preserved as dead ends.
Pulling post-loop desires up into a loop risks distorting hot code
to save a one-time register shuffle at the end.
We could use fancier heuristics for where to place cul de sacs,
but some experimentation suggested it wasn't worth more effort or code.
However, if problems arise, we may want to revisit.

Impact:

Compiling std cmd with -gcflags=all=-S and counting generated
register-to-register move instructions:

  amd64: 699,653 -> 686,254 (-1.92%)
  arm64: 865,580 -> 848,076 (-2.02%)

Performance benchmarks are super noisy, but indicate slight (<1%)
geomean improvements.

Change-Id: Ic595c14c3611af33e5a83892ba990f55765f426e
Reviewed-on: https://go-review.googlesource.com/c/go/+/774720
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Daniel Morsing <daniel.morsing@gmail.com>
LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Josh Bleecher Snyder 2026-04-20 09:41:35 -07:00 committed by Gopher Robot
parent 55089b9e27
commit 5b106947d1

View file

@ -3244,24 +3244,34 @@ func (s *regAllocState) computeDesired() {
// TODO: Can we speed this up using the liveness information we have already
// from computeLive?
// TODO: Since we don't propagate information through phi nodes, can we do
// this as a single dominator tree walk instead of the iterative solution?
var desired desiredState
f := s.f
po := f.postorder()
maxPreds := 0
for _, b := range f.Blocks {
maxPreds = max(maxPreds, len(b.Preds))
}
// phiPrefs[i] collects desired registers for phi inputs coming from b.Preds[i].
phiPrefs := make([]desiredState, maxPreds)
for {
changed := false
for _, b := range po {
desired.copy(&s.desired[b.ID])
for i := len(b.Values) - 1; i >= 0; i-- {
for i := range b.Preds {
phiPrefs[i].reset()
}
var headerLoop *loop // loop whose header is b, if any
if l := s.loopnest.b2l[b.ID]; l != nil && l.header == b {
headerLoop = l
}
// Process non-phis, then phis.
i := len(b.Values) - 1
for ; i >= 0; i-- {
v := b.Values[i]
prefs := desired.remove(v.ID)
if v.Op == OpPhi {
// TODO: if v is a phi, save desired register for phi inputs.
// For now, we just drop it and don't propagate
// desired registers back though phi nodes.
continue
break
}
prefs := desired.remove(v.ID)
regspec := s.regspec(v)
// Cancel desired registers if they get clobbered.
desired.clobber(regspec.clobbers)
@ -3286,9 +3296,33 @@ func (s *regAllocState) computeDesired() {
desired.addList(v.Args[0].ID, prefs)
}
}
for _, e := range b.Preds {
for ; i >= 0; i-- {
v := b.Values[i]
prefs := desired.remove(v.ID)
if prefs[0] == noRegister {
continue
}
// Phi desires go to phiPrefs (per-pred), so drop them from desired.avoid.
// The merge below re-adds any bits other entries still need.
for _, r := range prefs {
if r != noRegister {
desired.avoid = desired.avoid.minus(regMaskAt(r))
}
}
// Propagate v's desired registers back to its args.
for pidx, a := range v.Args {
if headerLoop != nil && s.loopnest.b2l[b.Preds[pidx].b.ID] == headerLoop {
// Skip direct back-edges to avoid pessimizing the loop body to skip a single reg-reg move.
// We check only the immediate loop; it is simple and empirically sufficient.
continue
}
phiPrefs[pidx].addList(a.ID, prefs)
}
}
for pidx, e := range b.Preds {
p := e.b
changed = s.desired[p.ID].merge(&desired) || changed
changed = s.desired[p.ID].merge(&phiPrefs[pidx]) || changed
}
}
if !changed || (!s.loopnest.hasIrreducible && len(s.loopnest.loops) == 0) {
@ -3461,6 +3495,12 @@ func (d *desiredState) clobber(m regMask) {
d.avoid = d.avoid.minus(m)
}
// reset prepares d for re-use.
func (d *desiredState) reset() {
d.entries = d.entries[:0]
d.avoid = regMask{}
}
// copy copies a desired state from another desiredState x.
func (d *desiredState) copy(x *desiredState) {
d.entries = append(d.entries[:0], x.entries...)