cmd/compile: remove rematerializable values from live set across calls

Remove rematerializable values in the live set across call operations,
preventing dead constant rematerialization.

When computing live values across function calls, rematerializable
values were being kept in the live set unnecessarily. This caused
the shuffling to rematerialize dead constants.

This change leads to code size reduction on arm64 linux:
    Executable            Old .text  New .text     Change
    -------------------------------------------------------
    asm                     1969236    1964356     -0.25%
    cgo                     1739588    1734884     -0.27%
    compile                 8950788    8932500     -0.20%
    cover                   1877268    1872916     -0.23%
    link                    2572660    2565076     -0.29%
    preprofile               866772     863828     -0.34%
    vet                     2888628    2881028     -0.26%

There seems also some compile time effect:

                        orig.results             uexp.results
                           sec/op       sec/op     vs base
BleveIndexBatch100-4       7.414 ± 2%    7.352 ± 1%       ~ (p=0.218 n=10)
ESBuildThreeJS-4          777.3m ± 1%   778.1m ± 1%       ~ (p=0.529 n=10)
ESBuildRomeTS-4           197.3m ± 1%   199.0m ± 1%       ~ (p=0.143 n=10)
EtcdPut-4                 64.92m ± 2%   64.95m ± 2%       ~ (p=0.912 n=10)
EtcdSTM-4                 323.9m ± 1%   323.0m ± 1%       ~ (p=0.393 n=10)
GoBuildKubelet-4           160.1 ± 0%    159.4 ± 0%  -0.42% (p=0.001 n=10)
GoBuildKubeletLink-4       12.40 ± 1%    12.27 ± 1%       ~ (p=0.529 n=10)
GoBuildIstioctl-4          125.8 ± 0%    125.2 ± 0%  -0.42% (p=0.000 n=10)
GoBuildIstioctlLink-4      8.679 ± 0%    8.686 ± 1%       ~ (p=0.912 n=10)
GoBuildFrontend-4          49.18 ± 0%    48.73 ± 0%  -0.92% (p=0.000 n=10)
GoBuildFrontendLink-4      2.300 ± 1%    2.292 ± 1%  -0.35% (p=0.043 n=10)
GopherLuaKNucleotide-4     37.77 ± 6%    38.07 ± 2%       ~ (p=0.218 n=10)
MarkdownRenderXHTML-4     274.3m ± 0%   275.2m ± 0%  +0.34% (p=0.003 n=10)
Tile38QueryLoad-4         650.7µ ± 0%   650.2µ ± 0%       ~ (p=0.971 n=10)
geomean                    2.130         2.127       -0.15%

Change-Id: I7a766195ee17bfd9e47d7a940864619f553416ff
Reviewed-on: https://go-review.googlesource.com/c/go/+/707415
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Alexander Musman 2025-09-28 18:48:46 +03:00 committed by Gopher Robot
parent 519ae514ab
commit 284379cdfc
2 changed files with 54 additions and 0 deletions

View file

@ -2806,6 +2806,7 @@ func (s *regAllocState) computeLive() {
s.live = make([][]liveInfo, f.NumBlocks())
s.desired = make([]desiredState, f.NumBlocks())
var phis []*Value
rematIDs := make([]ID, 0, 64)
live := f.newSparseMapPos(f.NumValues())
defer f.retSparseMapPos(live)
@ -2858,9 +2859,20 @@ func (s *regAllocState) computeLive() {
continue
}
if opcodeTable[v.Op].call {
rematIDs = rematIDs[:0]
c := live.contents()
for i := range c {
c[i].val += unlikelyDistance
vid := c[i].key
if s.values[vid].rematerializeable {
rematIDs = append(rematIDs, vid)
}
}
// We don't spill rematerializeable values, and assuming they
// are live across a call would only force shuffle to add some
// (dead) constant rematerialization. Remove them.
for _, r := range rematIDs {
live.remove(r)
}
}
for _, a := range v.Args {

View file

@ -265,6 +265,48 @@ func TestClobbersArg1(t *testing.T) {
}
}
func TestNoRematerializeDeadConstant(t *testing.T) {
c := testConfigARM64(t)
f := c.Fun("b1",
Bloc("b1",
Valu("mem", OpInitMem, types.TypeMem, 0, nil),
Valu("addr", OpArg, c.config.Types.Int32.PtrTo(), 0, c.Temp(c.config.Types.Int32.PtrTo())),
Valu("const", OpARM64MOVDconst, c.config.Types.Int32, -1, nil), // Original constant
Valu("cmp", OpARM64CMPconst, types.TypeFlags, 0, nil, "const"),
Goto("b2"),
),
Bloc("b2",
Valu("phi_mem", OpPhi, types.TypeMem, 0, nil, "mem", "callmem"),
Eq("cmp", "b6", "b3"),
),
Bloc("b3",
Valu("call", OpARM64CALLstatic, types.TypeMem, 0, AuxCallLSym("_"), "phi_mem"),
Valu("callmem", OpSelectN, types.TypeMem, 0, nil, "call"),
Eq("cmp", "b5", "b4"),
),
Bloc("b4", // A block where we don't really need to rematerialize the constant -1
Goto("b2"),
),
Bloc("b5",
Valu("user", OpAMD64MOVQstore, types.TypeMem, 0, nil, "addr", "const", "callmem"),
Exit("user"),
),
Bloc("b6",
Exit("phi_mem"),
),
)
regalloc(f.f)
checkFunc(f.f)
// Check that in block b4, there's no dead rematerialization of the constant -1
for _, v := range f.blocks["b4"].Values {
if v.Op == OpARM64MOVDconst && v.AuxInt == -1 {
t.Errorf("constant -1 rematerialized in loop block b4: %s", v.LongString())
}
}
}
func numSpills(b *Block) int {
return numOps(b, OpStoreReg)
}