[dev.simd] cmd/compile: accounts rematerialize ops's output reginfo

This CL implements the check for rematerializeable value's output
regspec at its remateralization site. It has some potential problems,
please see the TODO in regalloc.go.

Fixes #70451.

Change-Id: Ib624b967031776851136554719e939e9bf116b7c
Reviewed-on: https://go-review.googlesource.com/c/go/+/695315
Reviewed-by: David Chase <drchase@google.com>
TryBot-Bypass: David Chase <drchase@google.com>
This commit is contained in:
Junyang Shao 2025-08-12 16:53:44 +00:00
parent a4ad41708d
commit 9783f86bc8
4 changed files with 54 additions and 0 deletions

View file

@ -102,6 +102,7 @@ func (c *Config) NewFunc(fe Frontend, cache *Cache) *Func {
NamedValues: make(map[LocalSlot][]*Value), NamedValues: make(map[LocalSlot][]*Value),
CanonicalLocalSlots: make(map[LocalSlot]*LocalSlot), CanonicalLocalSlots: make(map[LocalSlot]*LocalSlot),
CanonicalLocalSplits: make(map[LocalSlotSplitKey]*LocalSlot), CanonicalLocalSplits: make(map[LocalSlotSplitKey]*LocalSlot),
OwnAux: &AuxCall{},
} }
} }

View file

@ -250,6 +250,11 @@ func Exit(arg string) ctrl {
return ctrl{BlockExit, arg, []string{}} return ctrl{BlockExit, arg, []string{}}
} }
// Ret specifies a BlockRet.
func Ret(arg string) ctrl {
return ctrl{BlockRet, arg, []string{}}
}
// Eq specifies a BlockAMD64EQ. // Eq specifies a BlockAMD64EQ.
func Eq(cond, sub, alt string) ctrl { func Eq(cond, sub, alt string) ctrl {
return ctrl{BlockAMD64EQ, cond, []string{sub, alt}} return ctrl{BlockAMD64EQ, cond, []string{sub, alt}}

View file

@ -609,6 +609,29 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos
} else if v.rematerializeable() { } else if v.rematerializeable() {
// Rematerialize instead of loading from the spill location. // Rematerialize instead of loading from the spill location.
c = v.copyIntoWithXPos(s.curBlock, pos) c = v.copyIntoWithXPos(s.curBlock, pos)
// We need to consider its output mask and potentially issue a Copy
// if there are register mask conflicts.
// This currently happens for the SIMD package only between GP and FP
// register. Because Intel's vector extension can put integer value into
// FP, which is seen as a vector. Example instruction: VPSLL[BWDQ]
// Because GP and FP masks do not overlap, mask & outputMask == 0
// detects this situation thoroughly.
sourceMask := s.regspec(c).outputs[0].regs
if mask&sourceMask == 0 && !onWasmStack {
s.setOrig(c, v)
s.assignReg(s.allocReg(sourceMask, v), v, c)
// v.Type for the new OpCopy is likely wrong and it might delay the problem
// until ssa to asm lowering, which might need the types to generate the right
// assembly for OpCopy. For Intel's GP to FP move, it happens to be that
// MOV instruction has such a variant so it happens to be right.
// But it's unclear for other architectures or situations, and the problem
// might be exposed when the assembler sees illegal instructions.
// Right now make we still pick v.Type, because at least its size should be correct
// for the rematerialization case the amd64 SIMD package exposed.
// TODO: We might need to figure out a way to find the correct type or make
// the asm lowering use reg info only for OpCopy.
c = s.curBlock.NewValue1(pos, OpCopy, v.Type, c)
}
} else { } else {
// Load v from its spill location. // Load v from its spill location.
spill := s.makeSpill(v, s.curBlock) spill := s.makeSpill(v, s.curBlock)

View file

@ -6,6 +6,7 @@ package ssa
import ( import (
"cmd/compile/internal/types" "cmd/compile/internal/types"
"cmd/internal/obj/x86"
"fmt" "fmt"
"testing" "testing"
) )
@ -279,3 +280,27 @@ func numOps(b *Block, op Op) int {
} }
return n return n
} }
func TestRematerializeableRegCompatible(t *testing.T) {
c := testConfig(t)
f := c.Fun("entry",
Bloc("entry",
Valu("mem", OpInitMem, types.TypeMem, 0, nil),
Valu("x", OpAMD64MOVLconst, c.config.Types.Int32, 1, nil),
Valu("a", OpAMD64POR, c.config.Types.Float32, 0, nil, "x", "x"),
Valu("res", OpMakeResult, types.NewResults([]*types.Type{c.config.Types.Float32, types.TypeMem}), 0, nil, "a", "mem"),
Ret("res"),
),
)
regalloc(f.f)
checkFunc(f.f)
moveFound := false
for _, v := range f.f.Blocks[0].Values {
if v.Op == OpCopy && x86.REG_X0 <= v.Reg() && v.Reg() <= x86.REG_X31 {
moveFound = true
}
}
if !moveFound {
t.Errorf("Expects an Copy to be issued, but got: %+v", f.f)
}
}