cmd/compile: regalloc: handle desired registers of 2-output insns

Particularly with 2-word load instructions, this becomes important.
Classic example is:

    func f(p *string) string {
        return *p
    }

We want the two loads to put the return values directly into
the two ABI return registers.

At this point in the stack, cmd/go is 1.1% smaller.

Change-Id: I51fd1710238e81d15aab2bfb816d73c8e7c207b1
Reviewed-on: https://go-review.googlesource.com/c/go/+/631137
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Keith Randall 2024-11-23 10:58:47 -08:00
parent 20d7c57422
commit a0029e95e5
2 changed files with 30 additions and 1 deletions

View file

@ -930,6 +930,7 @@ func (s *regAllocState) regalloc(f *Func) {
// Data structure used for computing desired registers.
var desired desiredState
desiredSecondReg := map[ID][4]register{} // desired register allocation for 2nd part of a tuple
// Desired registers for inputs & outputs for each instruction in the block.
type dentry struct {
@ -949,6 +950,7 @@ func (s *regAllocState) regalloc(f *Func) {
s.curBlock = b
s.startRegsMask = 0
s.usedSinceBlockStart = 0
clear(desiredSecondReg)
// Initialize regValLiveSet and uses fields for this block.
// Walk backwards through the block doing liveness analysis.
@ -1346,6 +1348,11 @@ func (s *regAllocState) regalloc(f *Func) {
}
dinfo[i].in[j] = desired.get(a.ID)
}
if v.Op == OpSelect1 && prefs[0] != noRegister {
// Save desired registers of select1 for
// use by the tuple generating instruction.
desiredSecondReg[v.Args[0].ID] = prefs
}
}
// Process all the non-phi values.
@ -1748,6 +1755,17 @@ func (s *regAllocState) regalloc(f *Func) {
}
}
}
if out.idx == 1 {
if prefs, ok := desiredSecondReg[v.ID]; ok {
for _, r := range prefs {
if r != noRegister && (mask&^s.used)>>r&1 != 0 {
// Desired register is allowed and unused.
mask = regMask(1) << r
break
}
}
}
}
// Avoid registers we're saving for other values.
if mask&^desired.avoid&^s.nospill&^s.used != 0 {
mask &^= desired.avoid
@ -2874,7 +2892,8 @@ type desiredStateEntry struct {
// Registers it would like to be in, in priority order.
// Unused slots are filled with noRegister.
// For opcodes that return tuples, we track desired registers only
// for the first element of the tuple.
// for the first element of the tuple (see desiredSecondReg for
// tracking the desired register for second part of a tuple).
regs [4]register
}

View file

@ -1004,6 +1004,16 @@ func dwloadArg(a [2]int64) int64 {
return a[0] + a[1]
}
func dwloadResult1(p *string) string {
// arm64:"LDP\t\\(R0\\), \\(R0, R1\\)"
return *p
}
func dwloadResult2(p *[2]int64) (int64, int64) {
// arm64:"LDP\t\\(R0\\), \\(R1, R0\\)"
return p[1], p[0]
}
// ---------------------------------- //
// Arm64 double-register stores //
// ---------------------------------- //