mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile: add instructions and rewrites for scalar-> vector moves
This required changes to the assembler so that VMOVSS and VMOVSD could handle FP constants. Change-Id: Iaa2f8df71867a3283bc058b7ec691b56a3e73621 Reviewed-on: https://go-review.googlesource.com/c/go/+/698240 Reviewed-by: Junyang Shao <shaojunyang@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
5ebe2d05d5
commit
6890aa2e20
6 changed files with 275 additions and 2 deletions
|
|
@ -1723,6 +1723,24 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = simdReg(v)
|
||||
|
||||
case ssa.OpAMD64VMOVQload, ssa.OpAMD64VMOVDload,
|
||||
ssa.OpAMD64VMOVSSload, ssa.OpAMD64VMOVSDload:
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_MEM
|
||||
p.From.Reg = v.Args[0].Reg()
|
||||
ssagen.AddAux(&p.From, v)
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = simdReg(v)
|
||||
|
||||
case ssa.OpAMD64VMOVSSconst, ssa.OpAMD64VMOVSDconst:
|
||||
// for loading constants directly into SIMD registers
|
||||
x := simdReg(v)
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_FCONST
|
||||
p.From.Val = math.Float64frombits(uint64(v.AuxInt))
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = x
|
||||
|
||||
case ssa.OpAMD64VMOVD, ssa.OpAMD64VMOVQ:
|
||||
// These are for initializing the least 32/64 bits of a SIMD register from an "int".
|
||||
p := s.Prog(v.Op.Asm())
|
||||
|
|
|
|||
|
|
@ -1782,3 +1782,12 @@
|
|||
(VPBROADCASTW(128|256|512) x:(VPINSRW128 [0] (Zero128 <t>) y)) && x.Uses == 1 =>
|
||||
(VPBROADCASTW(128|256|512) (VMOVQ <types.TypeVec128> y))
|
||||
|
||||
(VMOVQ x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (VMOVQload <v.Type> [off] {sym} ptr mem)
|
||||
(VMOVD x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (VMOVDload <v.Type> [off] {sym} ptr mem)
|
||||
|
||||
(VMOVSDf2v x:(MOVSDload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (VMOVSDload <v.Type> [off] {sym} ptr mem)
|
||||
(VMOVSSf2v x:(MOVSSload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (VMOVSSload <v.Type> [off] {sym} ptr mem)
|
||||
|
||||
(VMOVSDf2v x:(MOVSDconst [c] )) => (VMOVSDconst [c] )
|
||||
(VMOVSSf2v x:(MOVSSconst [c] )) => (VMOVSSconst [c] )
|
||||
|
||||
|
|
|
|||
|
|
@ -1389,6 +1389,14 @@ func init() {
|
|||
{name: "VMOVQ", argLength: 1, reg: gpv, asm: "VMOVQ"},
|
||||
{name: "VMOVD", argLength: 1, reg: gpv, asm: "VMOVD"},
|
||||
|
||||
{name: "VMOVQload", argLength: 2, reg: fpload, asm: "VMOVQ", aux: "SymOff", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"},
|
||||
{name: "VMOVDload", argLength: 2, reg: fpload, asm: "VMOVD", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"},
|
||||
{name: "VMOVSSload", argLength: 2, reg: fpload, asm: "VMOVSS", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},
|
||||
{name: "VMOVSDload", argLength: 2, reg: fpload, asm: "VMOVSD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},
|
||||
|
||||
{name: "VMOVSSconst", reg: fp01, asm: "VMOVSS", aux: "Float32", rematerializeable: true},
|
||||
{name: "VMOVSDconst", reg: fp01, asm: "VMOVSD", aux: "Float64", rematerializeable: true},
|
||||
|
||||
{name: "VZEROUPPER", argLength: 0, asm: "VZEROUPPER"},
|
||||
{name: "VZEROALL", argLength: 0, asm: "VZEROALL"},
|
||||
|
||||
|
|
|
|||
|
|
@ -1218,6 +1218,12 @@ const (
|
|||
OpAMD64VMOVSSf2v
|
||||
OpAMD64VMOVQ
|
||||
OpAMD64VMOVD
|
||||
OpAMD64VMOVQload
|
||||
OpAMD64VMOVDload
|
||||
OpAMD64VMOVSSload
|
||||
OpAMD64VMOVSDload
|
||||
OpAMD64VMOVSSconst
|
||||
OpAMD64VMOVSDconst
|
||||
OpAMD64VZEROUPPER
|
||||
OpAMD64VZEROALL
|
||||
OpAMD64KMOVQload
|
||||
|
|
@ -18925,6 +18931,94 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VMOVQload",
|
||||
auxType: auxSymOff,
|
||||
argLen: 2,
|
||||
faultOnNilArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.AVMOVQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VMOVDload",
|
||||
auxType: auxSymOff,
|
||||
argLen: 2,
|
||||
faultOnNilArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.AVMOVD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VMOVSSload",
|
||||
auxType: auxSymOff,
|
||||
argLen: 2,
|
||||
faultOnNilArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.AVMOVSS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VMOVSDload",
|
||||
auxType: auxSymOff,
|
||||
argLen: 2,
|
||||
faultOnNilArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.AVMOVSD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VMOVSSconst",
|
||||
auxType: auxFloat32,
|
||||
argLen: 0,
|
||||
rematerializeable: true,
|
||||
asm: x86.AVMOVSS,
|
||||
reg: regInfo{
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VMOVSDconst",
|
||||
auxType: auxFloat64,
|
||||
argLen: 0,
|
||||
rematerializeable: true,
|
||||
asm: x86.AVMOVSD,
|
||||
reg: regInfo{
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VZEROUPPER",
|
||||
argLen: 0,
|
||||
|
|
|
|||
|
|
@ -507,6 +507,8 @@ func rewriteValueAMD64(v *Value) bool {
|
|||
return rewriteValueAMD64_OpAMD64TESTW(v)
|
||||
case OpAMD64TESTWconst:
|
||||
return rewriteValueAMD64_OpAMD64TESTWconst(v)
|
||||
case OpAMD64VMOVD:
|
||||
return rewriteValueAMD64_OpAMD64VMOVD(v)
|
||||
case OpAMD64VMOVDQU16Masked512:
|
||||
return rewriteValueAMD64_OpAMD64VMOVDQU16Masked512(v)
|
||||
case OpAMD64VMOVDQU32Masked512:
|
||||
|
|
@ -515,6 +517,12 @@ func rewriteValueAMD64(v *Value) bool {
|
|||
return rewriteValueAMD64_OpAMD64VMOVDQU64Masked512(v)
|
||||
case OpAMD64VMOVDQU8Masked512:
|
||||
return rewriteValueAMD64_OpAMD64VMOVDQU8Masked512(v)
|
||||
case OpAMD64VMOVQ:
|
||||
return rewriteValueAMD64_OpAMD64VMOVQ(v)
|
||||
case OpAMD64VMOVSDf2v:
|
||||
return rewriteValueAMD64_OpAMD64VMOVSDf2v(v)
|
||||
case OpAMD64VMOVSSf2v:
|
||||
return rewriteValueAMD64_OpAMD64VMOVSSf2v(v)
|
||||
case OpAMD64VPANDQ512:
|
||||
return rewriteValueAMD64_OpAMD64VPANDQ512(v)
|
||||
case OpAMD64VPBROADCASTB128:
|
||||
|
|
@ -26442,6 +26450,34 @@ func rewriteValueAMD64_OpAMD64TESTWconst(v *Value) bool {
|
|||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VMOVD(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (VMOVD x:(MOVLload [off] {sym} ptr mem))
|
||||
// cond: x.Uses == 1 && clobber(x)
|
||||
// result: @x.Block (VMOVDload <v.Type> [off] {sym} ptr mem)
|
||||
for {
|
||||
x := v_0
|
||||
if x.Op != OpAMD64MOVLload {
|
||||
break
|
||||
}
|
||||
off := auxIntToInt32(x.AuxInt)
|
||||
sym := auxToSym(x.Aux)
|
||||
mem := x.Args[1]
|
||||
ptr := x.Args[0]
|
||||
if !(x.Uses == 1 && clobber(x)) {
|
||||
break
|
||||
}
|
||||
b = x.Block
|
||||
v0 := b.NewValue0(x.Pos, OpAMD64VMOVDload, v.Type)
|
||||
v.copyOf(v0)
|
||||
v0.AuxInt = int32ToAuxInt(off)
|
||||
v0.Aux = symToAux(sym)
|
||||
v0.AddArg2(ptr, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VMOVDQU16Masked512(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
|
|
@ -28799,6 +28835,114 @@ func rewriteValueAMD64_OpAMD64VMOVDQU8Masked512(v *Value) bool {
|
|||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VMOVQ(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (VMOVQ x:(MOVQload [off] {sym} ptr mem))
|
||||
// cond: x.Uses == 1 && clobber(x)
|
||||
// result: @x.Block (VMOVQload <v.Type> [off] {sym} ptr mem)
|
||||
for {
|
||||
x := v_0
|
||||
if x.Op != OpAMD64MOVQload {
|
||||
break
|
||||
}
|
||||
off := auxIntToInt32(x.AuxInt)
|
||||
sym := auxToSym(x.Aux)
|
||||
mem := x.Args[1]
|
||||
ptr := x.Args[0]
|
||||
if !(x.Uses == 1 && clobber(x)) {
|
||||
break
|
||||
}
|
||||
b = x.Block
|
||||
v0 := b.NewValue0(x.Pos, OpAMD64VMOVQload, v.Type)
|
||||
v.copyOf(v0)
|
||||
v0.AuxInt = int32ToAuxInt(off)
|
||||
v0.Aux = symToAux(sym)
|
||||
v0.AddArg2(ptr, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VMOVSDf2v(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (VMOVSDf2v x:(MOVSDload [off] {sym} ptr mem))
|
||||
// cond: x.Uses == 1 && clobber(x)
|
||||
// result: @x.Block (VMOVSDload <v.Type> [off] {sym} ptr mem)
|
||||
for {
|
||||
x := v_0
|
||||
if x.Op != OpAMD64MOVSDload {
|
||||
break
|
||||
}
|
||||
off := auxIntToInt32(x.AuxInt)
|
||||
sym := auxToSym(x.Aux)
|
||||
mem := x.Args[1]
|
||||
ptr := x.Args[0]
|
||||
if !(x.Uses == 1 && clobber(x)) {
|
||||
break
|
||||
}
|
||||
b = x.Block
|
||||
v0 := b.NewValue0(x.Pos, OpAMD64VMOVSDload, v.Type)
|
||||
v.copyOf(v0)
|
||||
v0.AuxInt = int32ToAuxInt(off)
|
||||
v0.Aux = symToAux(sym)
|
||||
v0.AddArg2(ptr, mem)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVSDf2v x:(MOVSDconst [c] ))
|
||||
// result: (VMOVSDconst [c] )
|
||||
for {
|
||||
x := v_0
|
||||
if x.Op != OpAMD64MOVSDconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToFloat64(x.AuxInt)
|
||||
v.reset(OpAMD64VMOVSDconst)
|
||||
v.AuxInt = float64ToAuxInt(c)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VMOVSSf2v(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (VMOVSSf2v x:(MOVSSload [off] {sym} ptr mem))
|
||||
// cond: x.Uses == 1 && clobber(x)
|
||||
// result: @x.Block (VMOVSSload <v.Type> [off] {sym} ptr mem)
|
||||
for {
|
||||
x := v_0
|
||||
if x.Op != OpAMD64MOVSSload {
|
||||
break
|
||||
}
|
||||
off := auxIntToInt32(x.AuxInt)
|
||||
sym := auxToSym(x.Aux)
|
||||
mem := x.Args[1]
|
||||
ptr := x.Args[0]
|
||||
if !(x.Uses == 1 && clobber(x)) {
|
||||
break
|
||||
}
|
||||
b = x.Block
|
||||
v0 := b.NewValue0(x.Pos, OpAMD64VMOVSSload, v.Type)
|
||||
v.copyOf(v0)
|
||||
v0.AuxInt = int32ToAuxInt(off)
|
||||
v0.Aux = symToAux(sym)
|
||||
v0.AddArg2(ptr, mem)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVSSf2v x:(MOVSSconst [c] ))
|
||||
// result: (VMOVSSconst [c] )
|
||||
for {
|
||||
x := v_0
|
||||
if x.Op != OpAMD64MOVSSconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToFloat32(x.AuxInt)
|
||||
v.reset(OpAMD64VMOVSSconst)
|
||||
v.AuxInt = float32ToAuxInt(c)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPANDQ512(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
|
|
|
|||
|
|
@ -236,7 +236,7 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
|
|||
// Rewrite float constants to values stored in memory.
|
||||
switch p.As {
|
||||
// Convert AMOVSS $(0), Xx to AXORPS Xx, Xx
|
||||
case AMOVSS:
|
||||
case AMOVSS, AVMOVSS:
|
||||
if p.From.Type == obj.TYPE_FCONST {
|
||||
// f == 0 can't be used here due to -0, so use Float64bits
|
||||
if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
|
||||
|
|
@ -272,7 +272,7 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
|
|||
p.From.Offset = 0
|
||||
}
|
||||
|
||||
case AMOVSD:
|
||||
case AMOVSD, AVMOVSD:
|
||||
// Convert AMOVSD $(0), Xx to AXORPS Xx, Xx
|
||||
if p.From.Type == obj.TYPE_FCONST {
|
||||
// f == 0 can't be used here due to -0, so use Float64bits
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue