mirror of
https://github.com/golang/go.git
synced 2026-06-28 03:40:37 +00:00
cmd/compile: propagate desired registers through phi nodes
Do the TODO in computeDesired to trace desired-register info through phis. Propagate the phi's desired registers back to each phi input and record them against that input's predecessor. Innermost loop back-edges are preserved as dead ends. Pulling post-loop desires up into a loop risks distorting hot code to save a one-time register shuffle at the end. We could use fancier heuristics for where to place cul de sacs, but some experimentation suggested it wasn't worth more effort or code. However, if problems arise, we may want to revisit. Impact: Compiling std cmd with -gcflags=all=-S and counting generated register-to-register move instructions: amd64: 699,653 -> 686,254 (-1.92%) arm64: 865,580 -> 848,076 (-2.02%) Performance benchmarks are super noisy, but indicate slight (<1%) geomean improvements. Change-Id: Ic595c14c3611af33e5a83892ba990f55765f426e Reviewed-on: https://go-review.googlesource.com/c/go/+/774720 Auto-Submit: Keith Randall <khr@golang.org> Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Reviewed-by: Daniel Morsing <daniel.morsing@gmail.com> LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
55089b9e27
commit
5b106947d1
1 changed files with 49 additions and 9 deletions
|
|
@ -3244,24 +3244,34 @@ func (s *regAllocState) computeDesired() {
|
|||
|
||||
// TODO: Can we speed this up using the liveness information we have already
|
||||
// from computeLive?
|
||||
// TODO: Since we don't propagate information through phi nodes, can we do
|
||||
// this as a single dominator tree walk instead of the iterative solution?
|
||||
var desired desiredState
|
||||
f := s.f
|
||||
po := f.postorder()
|
||||
maxPreds := 0
|
||||
for _, b := range f.Blocks {
|
||||
maxPreds = max(maxPreds, len(b.Preds))
|
||||
}
|
||||
// phiPrefs[i] collects desired registers for phi inputs coming from b.Preds[i].
|
||||
phiPrefs := make([]desiredState, maxPreds)
|
||||
for {
|
||||
changed := false
|
||||
for _, b := range po {
|
||||
desired.copy(&s.desired[b.ID])
|
||||
for i := len(b.Values) - 1; i >= 0; i-- {
|
||||
for i := range b.Preds {
|
||||
phiPrefs[i].reset()
|
||||
}
|
||||
var headerLoop *loop // loop whose header is b, if any
|
||||
if l := s.loopnest.b2l[b.ID]; l != nil && l.header == b {
|
||||
headerLoop = l
|
||||
}
|
||||
// Process non-phis, then phis.
|
||||
i := len(b.Values) - 1
|
||||
for ; i >= 0; i-- {
|
||||
v := b.Values[i]
|
||||
prefs := desired.remove(v.ID)
|
||||
if v.Op == OpPhi {
|
||||
// TODO: if v is a phi, save desired register for phi inputs.
|
||||
// For now, we just drop it and don't propagate
|
||||
// desired registers back though phi nodes.
|
||||
continue
|
||||
break
|
||||
}
|
||||
prefs := desired.remove(v.ID)
|
||||
regspec := s.regspec(v)
|
||||
// Cancel desired registers if they get clobbered.
|
||||
desired.clobber(regspec.clobbers)
|
||||
|
|
@ -3286,9 +3296,33 @@ func (s *regAllocState) computeDesired() {
|
|||
desired.addList(v.Args[0].ID, prefs)
|
||||
}
|
||||
}
|
||||
for _, e := range b.Preds {
|
||||
for ; i >= 0; i-- {
|
||||
v := b.Values[i]
|
||||
prefs := desired.remove(v.ID)
|
||||
if prefs[0] == noRegister {
|
||||
continue
|
||||
}
|
||||
// Phi desires go to phiPrefs (per-pred), so drop them from desired.avoid.
|
||||
// The merge below re-adds any bits other entries still need.
|
||||
for _, r := range prefs {
|
||||
if r != noRegister {
|
||||
desired.avoid = desired.avoid.minus(regMaskAt(r))
|
||||
}
|
||||
}
|
||||
// Propagate v's desired registers back to its args.
|
||||
for pidx, a := range v.Args {
|
||||
if headerLoop != nil && s.loopnest.b2l[b.Preds[pidx].b.ID] == headerLoop {
|
||||
// Skip direct back-edges to avoid pessimizing the loop body to skip a single reg-reg move.
|
||||
// We check only the immediate loop; it is simple and empirically sufficient.
|
||||
continue
|
||||
}
|
||||
phiPrefs[pidx].addList(a.ID, prefs)
|
||||
}
|
||||
}
|
||||
for pidx, e := range b.Preds {
|
||||
p := e.b
|
||||
changed = s.desired[p.ID].merge(&desired) || changed
|
||||
changed = s.desired[p.ID].merge(&phiPrefs[pidx]) || changed
|
||||
}
|
||||
}
|
||||
if !changed || (!s.loopnest.hasIrreducible && len(s.loopnest.loops) == 0) {
|
||||
|
|
@ -3461,6 +3495,12 @@ func (d *desiredState) clobber(m regMask) {
|
|||
d.avoid = d.avoid.minus(m)
|
||||
}
|
||||
|
||||
// reset prepares d for re-use.
|
||||
func (d *desiredState) reset() {
|
||||
d.entries = d.entries[:0]
|
||||
d.avoid = regMask{}
|
||||
}
|
||||
|
||||
// copy copies a desired state from another desiredState x.
|
||||
func (d *desiredState) copy(x *desiredState) {
|
||||
d.entries = append(d.entries[:0], x.entries...)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue