mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile: accounts rematerialize ops's output reginfo
This CL implements the check for rematerializeable value's output regspec at its remateralization site. It has some potential problems, please see the TODO in regalloc.go. Fixes #70451. Change-Id: Ib624b967031776851136554719e939e9bf116b7c Reviewed-on: https://go-review.googlesource.com/c/go/+/695315 Reviewed-by: David Chase <drchase@google.com> TryBot-Bypass: David Chase <drchase@google.com>
This commit is contained in:
parent
a4ad41708d
commit
9783f86bc8
4 changed files with 54 additions and 0 deletions
|
|
@ -102,6 +102,7 @@ func (c *Config) NewFunc(fe Frontend, cache *Cache) *Func {
|
|||
NamedValues: make(map[LocalSlot][]*Value),
|
||||
CanonicalLocalSlots: make(map[LocalSlot]*LocalSlot),
|
||||
CanonicalLocalSplits: make(map[LocalSlotSplitKey]*LocalSlot),
|
||||
OwnAux: &AuxCall{},
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -250,6 +250,11 @@ func Exit(arg string) ctrl {
|
|||
return ctrl{BlockExit, arg, []string{}}
|
||||
}
|
||||
|
||||
// Ret specifies a BlockRet.
|
||||
func Ret(arg string) ctrl {
|
||||
return ctrl{BlockRet, arg, []string{}}
|
||||
}
|
||||
|
||||
// Eq specifies a BlockAMD64EQ.
|
||||
func Eq(cond, sub, alt string) ctrl {
|
||||
return ctrl{BlockAMD64EQ, cond, []string{sub, alt}}
|
||||
|
|
|
|||
|
|
@ -609,6 +609,29 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos
|
|||
} else if v.rematerializeable() {
|
||||
// Rematerialize instead of loading from the spill location.
|
||||
c = v.copyIntoWithXPos(s.curBlock, pos)
|
||||
// We need to consider its output mask and potentially issue a Copy
|
||||
// if there are register mask conflicts.
|
||||
// This currently happens for the SIMD package only between GP and FP
|
||||
// register. Because Intel's vector extension can put integer value into
|
||||
// FP, which is seen as a vector. Example instruction: VPSLL[BWDQ]
|
||||
// Because GP and FP masks do not overlap, mask & outputMask == 0
|
||||
// detects this situation thoroughly.
|
||||
sourceMask := s.regspec(c).outputs[0].regs
|
||||
if mask&sourceMask == 0 && !onWasmStack {
|
||||
s.setOrig(c, v)
|
||||
s.assignReg(s.allocReg(sourceMask, v), v, c)
|
||||
// v.Type for the new OpCopy is likely wrong and it might delay the problem
|
||||
// until ssa to asm lowering, which might need the types to generate the right
|
||||
// assembly for OpCopy. For Intel's GP to FP move, it happens to be that
|
||||
// MOV instruction has such a variant so it happens to be right.
|
||||
// But it's unclear for other architectures or situations, and the problem
|
||||
// might be exposed when the assembler sees illegal instructions.
|
||||
// Right now make we still pick v.Type, because at least its size should be correct
|
||||
// for the rematerialization case the amd64 SIMD package exposed.
|
||||
// TODO: We might need to figure out a way to find the correct type or make
|
||||
// the asm lowering use reg info only for OpCopy.
|
||||
c = s.curBlock.NewValue1(pos, OpCopy, v.Type, c)
|
||||
}
|
||||
} else {
|
||||
// Load v from its spill location.
|
||||
spill := s.makeSpill(v, s.curBlock)
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ package ssa
|
|||
|
||||
import (
|
||||
"cmd/compile/internal/types"
|
||||
"cmd/internal/obj/x86"
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
|
@ -279,3 +280,27 @@ func numOps(b *Block, op Op) int {
|
|||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func TestRematerializeableRegCompatible(t *testing.T) {
|
||||
c := testConfig(t)
|
||||
f := c.Fun("entry",
|
||||
Bloc("entry",
|
||||
Valu("mem", OpInitMem, types.TypeMem, 0, nil),
|
||||
Valu("x", OpAMD64MOVLconst, c.config.Types.Int32, 1, nil),
|
||||
Valu("a", OpAMD64POR, c.config.Types.Float32, 0, nil, "x", "x"),
|
||||
Valu("res", OpMakeResult, types.NewResults([]*types.Type{c.config.Types.Float32, types.TypeMem}), 0, nil, "a", "mem"),
|
||||
Ret("res"),
|
||||
),
|
||||
)
|
||||
regalloc(f.f)
|
||||
checkFunc(f.f)
|
||||
moveFound := false
|
||||
for _, v := range f.f.Blocks[0].Values {
|
||||
if v.Op == OpCopy && x86.REG_X0 <= v.Reg() && v.Reg() <= x86.REG_X31 {
|
||||
moveFound = true
|
||||
}
|
||||
}
|
||||
if !moveFound {
|
||||
t.Errorf("Expects an Copy to be issued, but got: %+v", f.f)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue