mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile: adapters for simd
This combines several CLs into a single patch of "glue" for the generated SIMD extensions. This glue includes GOEXPERIMENT checks that disable the creation of user-visible "simd" types and that disable the registration of "simd" intrinsics. The simd type checks were changed to work for either package "simd" or "internal/simd" so that moving that package won't be quite so fragile. cmd/compile, internal/simd: glue for adding SIMD extensions to Go cmd/compile: theft of Cherry's sample SIMD compilation Change-Id: Id44e2f4bafe74032c26de576a8691b6f7d977e01 Reviewed-on: https://go-review.googlesource.com/c/go/+/675598 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Junyang Shao <shaojunyang@google.com>
This commit is contained in:
parent
2ef7106881
commit
04b1030ae4
26 changed files with 2196 additions and 675 deletions
|
|
@ -150,12 +150,12 @@ func appendParamTypes(rts []*types.Type, t *types.Type) []*types.Type {
|
|||
if w == 0 {
|
||||
return rts
|
||||
}
|
||||
if t.IsScalar() || t.IsPtrShaped() {
|
||||
if t.IsScalar() || t.IsPtrShaped() || t.IsSIMD() {
|
||||
if t.IsComplex() {
|
||||
c := types.FloatForComplex(t)
|
||||
return append(rts, c, c)
|
||||
} else {
|
||||
if int(t.Size()) <= types.RegSize {
|
||||
if int(t.Size()) <= types.RegSize || t.IsSIMD() {
|
||||
return append(rts, t)
|
||||
}
|
||||
// assume 64bit int on 32-bit machine
|
||||
|
|
@ -199,6 +199,9 @@ func appendParamOffsets(offsets []int64, at int64, t *types.Type) ([]int64, int6
|
|||
if w == 0 {
|
||||
return offsets, at
|
||||
}
|
||||
if t.IsSIMD() {
|
||||
return append(offsets, at), at + w
|
||||
}
|
||||
if t.IsScalar() || t.IsPtrShaped() {
|
||||
if t.IsComplex() || int(t.Size()) > types.RegSize { // complex and *int64 on 32-bit
|
||||
s := w / 2
|
||||
|
|
@ -521,11 +524,11 @@ func (state *assignState) allocateRegs(regs []RegIndex, t *types.Type) []RegInde
|
|||
}
|
||||
ri := state.rUsed.intRegs
|
||||
rf := state.rUsed.floatRegs
|
||||
if t.IsScalar() || t.IsPtrShaped() {
|
||||
if t.IsScalar() || t.IsPtrShaped() || t.IsSIMD() {
|
||||
if t.IsComplex() {
|
||||
regs = append(regs, RegIndex(rf+state.rTotal.intRegs), RegIndex(rf+1+state.rTotal.intRegs))
|
||||
rf += 2
|
||||
} else if t.IsFloat() {
|
||||
} else if t.IsFloat() || t.IsSIMD() {
|
||||
regs = append(regs, RegIndex(rf+state.rTotal.intRegs))
|
||||
rf += 1
|
||||
} else {
|
||||
|
|
|
|||
19
src/cmd/compile/internal/amd64/simdssa.go
Normal file
19
src/cmd/compile/internal/amd64/simdssa.go
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
// Copyright 2025 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Placeholder for generated glue to come later
|
||||
package amd64
|
||||
|
||||
import (
|
||||
"cmd/compile/internal/ssa"
|
||||
"cmd/compile/internal/ssagen"
|
||||
)
|
||||
|
||||
func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||
switch v.Op {
|
||||
default:
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
|
@ -67,6 +67,8 @@ func storeByType(t *types.Type) obj.As {
|
|||
case 8:
|
||||
return x86.AMOVSD
|
||||
}
|
||||
} else if t.IsSIMD() {
|
||||
return simdMov(width)
|
||||
} else {
|
||||
switch width {
|
||||
case 1:
|
||||
|
|
@ -92,6 +94,8 @@ func moveByType(t *types.Type) obj.As {
|
|||
// There is no xmm->xmm move with 1 byte opcode,
|
||||
// so use movups, which has 2 byte opcode.
|
||||
return x86.AMOVUPS
|
||||
} else if t.IsSIMD() {
|
||||
return simdMov(t.Size())
|
||||
} else {
|
||||
switch t.Size() {
|
||||
case 1:
|
||||
|
|
@ -1038,6 +1042,10 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
}
|
||||
x := v.Args[0].Reg()
|
||||
y := v.Reg()
|
||||
if v.Type.IsSIMD() {
|
||||
x = simdReg(v.Args[0])
|
||||
y = simdReg(v)
|
||||
}
|
||||
if x != y {
|
||||
opregreg(s, moveByType(v.Type), y, x)
|
||||
}
|
||||
|
|
@ -1049,16 +1057,24 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
p := s.Prog(loadByType(v.Type))
|
||||
ssagen.AddrAuto(&p.From, v.Args[0])
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = v.Reg()
|
||||
r := v.Reg()
|
||||
if v.Type.IsSIMD() {
|
||||
r = simdReg(v)
|
||||
}
|
||||
p.To.Reg = r
|
||||
|
||||
case ssa.OpStoreReg:
|
||||
if v.Type.IsFlags() {
|
||||
v.Fatalf("store flags not implemented: %v", v.LongString())
|
||||
return
|
||||
}
|
||||
r := v.Args[0].Reg()
|
||||
if v.Type.IsSIMD() {
|
||||
r = simdReg(v.Args[0])
|
||||
}
|
||||
p := s.Prog(storeByType(v.Type))
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = v.Args[0].Reg()
|
||||
p.From.Reg = r
|
||||
ssagen.AddrAuto(&p.To, v)
|
||||
case ssa.OpAMD64LoweredHasCPUFeature:
|
||||
p := s.Prog(x86.AMOVBLZX)
|
||||
|
|
@ -1426,10 +1442,124 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
p.From.Offset = int64(x)
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = v.Reg()
|
||||
|
||||
// XXX SIMD
|
||||
// XXX may change depending on how we handle aliased registers
|
||||
case ssa.OpAMD64Zero128, ssa.OpAMD64Zero256, ssa.OpAMD64Zero512:
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = simdReg(v)
|
||||
p.AddRestSourceReg(simdReg(v))
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = simdReg(v)
|
||||
case ssa.OpAMD64VPADDD4:
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = simdReg(v.Args[0])
|
||||
p.AddRestSourceReg(simdReg(v.Args[1]))
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = simdReg(v)
|
||||
case ssa.OpAMD64VMOVDQUload128, ssa.OpAMD64VMOVDQUload256, ssa.OpAMD64VMOVDQUload512:
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_MEM
|
||||
p.From.Reg = v.Args[0].Reg()
|
||||
ssagen.AddAux(&p.From, v)
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = simdReg(v)
|
||||
case ssa.OpAMD64VMOVDQUstore128, ssa.OpAMD64VMOVDQUstore256, ssa.OpAMD64VMOVDQUstore512:
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = simdReg(v.Args[1])
|
||||
p.To.Type = obj.TYPE_MEM
|
||||
p.To.Reg = v.Args[0].Reg()
|
||||
ssagen.AddAux(&p.To, v)
|
||||
|
||||
case ssa.OpAMD64VPMOVMToVec8x16,
|
||||
ssa.OpAMD64VPMOVMToVec8x32,
|
||||
ssa.OpAMD64VPMOVMToVec8x64,
|
||||
ssa.OpAMD64VPMOVMToVec16x8,
|
||||
ssa.OpAMD64VPMOVMToVec16x16,
|
||||
ssa.OpAMD64VPMOVMToVec16x32,
|
||||
ssa.OpAMD64VPMOVMToVec32x4,
|
||||
ssa.OpAMD64VPMOVMToVec32x8,
|
||||
ssa.OpAMD64VPMOVMToVec32x16,
|
||||
ssa.OpAMD64VPMOVMToVec64x2,
|
||||
ssa.OpAMD64VPMOVMToVec64x4,
|
||||
ssa.OpAMD64VPMOVMToVec64x8:
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = v.Args[0].Reg()
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = simdReg(v)
|
||||
|
||||
case ssa.OpAMD64VPMOVVec8x16ToM,
|
||||
ssa.OpAMD64VPMOVVec8x32ToM,
|
||||
ssa.OpAMD64VPMOVVec8x64ToM,
|
||||
ssa.OpAMD64VPMOVVec16x8ToM,
|
||||
ssa.OpAMD64VPMOVVec16x16ToM,
|
||||
ssa.OpAMD64VPMOVVec16x32ToM,
|
||||
ssa.OpAMD64VPMOVVec32x4ToM,
|
||||
ssa.OpAMD64VPMOVVec32x8ToM,
|
||||
ssa.OpAMD64VPMOVVec32x16ToM,
|
||||
ssa.OpAMD64VPMOVVec64x2ToM,
|
||||
ssa.OpAMD64VPMOVVec64x4ToM,
|
||||
ssa.OpAMD64VPMOVVec64x8ToM:
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = simdReg(v.Args[0])
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = v.Reg()
|
||||
|
||||
default:
|
||||
if !ssaGenSIMDValue(s, v) {
|
||||
v.Fatalf("genValue not implemented: %s", v.LongString())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func simdGenUnary(s *ssagen.State, v *ssa.Value) {
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = simdReg(v.Args[0])
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = simdReg(v)
|
||||
}
|
||||
|
||||
func simdGenBinary(s *ssagen.State, v *ssa.Value) {
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = simdReg(v.Args[0])
|
||||
p.AddRestSourceReg(simdReg(v.Args[1]))
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = simdReg(v)
|
||||
}
|
||||
|
||||
func simdGenUnaryImmUint8(s *ssagen.State, v *ssa.Value) {
|
||||
p := s.Prog(v.Op.Asm())
|
||||
imm := v.AuxInt
|
||||
if imm < 0 || imm > 255 {
|
||||
v.Fatalf("Invalid source selection immediate")
|
||||
}
|
||||
p.From.Offset = imm
|
||||
p.From.Type = obj.TYPE_CONST
|
||||
p.AddRestSourceReg(simdReg(v.Args[0]))
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = simdReg(v)
|
||||
}
|
||||
|
||||
func simdGenBinaryImmUint8(s *ssagen.State, v *ssa.Value) {
|
||||
p := s.Prog(v.Op.Asm())
|
||||
imm := v.AuxInt
|
||||
if imm < 0 || imm > 255 {
|
||||
v.Fatalf("Invalid source selection immediate")
|
||||
}
|
||||
p.From.Offset = imm
|
||||
p.From.Type = obj.TYPE_CONST
|
||||
p.AddRestSourceReg(simdReg(v.Args[0]))
|
||||
p.AddRestSourceReg(simdReg(v.Args[1]))
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = simdReg(v)
|
||||
}
|
||||
|
||||
var blockJump = [...]struct {
|
||||
asm, invasm obj.As
|
||||
|
|
@ -1532,3 +1662,30 @@ func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg in
|
|||
p.Pos = p.Pos.WithNotStmt()
|
||||
return p
|
||||
}
|
||||
|
||||
// XXX maybe make this part of v.Reg?
|
||||
// On the other hand, it is architecture-specific.
|
||||
func simdReg(v *ssa.Value) int16 {
|
||||
t := v.Type
|
||||
if !t.IsSIMD() {
|
||||
panic("simdReg: not a simd type")
|
||||
}
|
||||
switch t.Size() {
|
||||
case 16:
|
||||
return v.Reg()
|
||||
case 32:
|
||||
return v.Reg() + (x86.REG_Y0 - x86.REG_X0)
|
||||
case 64:
|
||||
return v.Reg() + (x86.REG_Z0 - x86.REG_X0)
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
func simdMov(width int64) obj.As {
|
||||
if width >= 64 {
|
||||
return x86.AVMOVDQU64
|
||||
} else if width >= 16 {
|
||||
return x86.AVMOVDQU
|
||||
}
|
||||
return x86.AKMOVQ
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1680,3 +1680,36 @@
|
|||
|
||||
// If we don't use the flags any more, just use the standard op.
|
||||
(Select0 a:(ADD(Q|L)constflags [c] x)) && a.Uses == 1 => (ADD(Q|L)const [c] x)
|
||||
|
||||
// XXX SIMD
|
||||
(Load <t> ptr mem) && t.Size() == 16 => (VMOVDQUload128 ptr mem)
|
||||
|
||||
(Store {t} ptr val mem) && t.Size() == 16 => (VMOVDQUstore128 ptr val mem)
|
||||
|
||||
(Load <t> ptr mem) && t.Size() == 32 => (VMOVDQUload256 ptr mem)
|
||||
|
||||
(Store {t} ptr val mem) && t.Size() == 32 => (VMOVDQUstore256 ptr val mem)
|
||||
|
||||
(Load <t> ptr mem) && t.Size() == 64 => (VMOVDQUload512 ptr mem)
|
||||
|
||||
(Store {t} ptr val mem) && t.Size() == 64 => (VMOVDQUstore512 ptr val mem)
|
||||
|
||||
(ZeroSIMD <t>) && t.Size() == 16 => (Zero128 <t>)
|
||||
(ZeroSIMD <t>) && t.Size() == 32 => (Zero256 <t>)
|
||||
(ZeroSIMD <t>) && t.Size() == 64 => (Zero512 <t>)
|
||||
|
||||
(VPMOVVec8x16ToM (VPMOVMToVec8x16 x)) => x
|
||||
(VPMOVVec8x32ToM (VPMOVMToVec8x32 x)) => x
|
||||
(VPMOVVec8x64ToM (VPMOVMToVec8x64 x)) => x
|
||||
|
||||
(VPMOVVec16x8ToM (VPMOVMToVec16x8 x)) => x
|
||||
(VPMOVVec16x16ToM (VPMOVMToVec16x16 x)) => x
|
||||
(VPMOVVec16x32ToM (VPMOVMToVec16x32 x)) => x
|
||||
|
||||
(VPMOVVec32x4ToM (VPMOVMToVec32x4 x)) => x
|
||||
(VPMOVVec32x8ToM (VPMOVMToVec32x8 x)) => x
|
||||
(VPMOVVec32x16ToM (VPMOVMToVec32x16 x)) => x
|
||||
|
||||
(VPMOVVec64x2ToM (VPMOVMToVec64x2 x)) => x
|
||||
(VPMOVVec64x4ToM (VPMOVMToVec64x4 x)) => x
|
||||
(VPMOVVec64x8ToM (VPMOVMToVec64x8 x)) => x
|
||||
|
|
|
|||
|
|
@ -63,6 +63,16 @@ var regNamesAMD64 = []string{
|
|||
"X14",
|
||||
"X15", // constant 0 in ABIInternal
|
||||
|
||||
// TODO: update asyncPreempt for K registers.
|
||||
// asyncPreempt also needs to store Z0-Z15 properly.
|
||||
"K0",
|
||||
"K1",
|
||||
"K2",
|
||||
"K3",
|
||||
"K4",
|
||||
"K5",
|
||||
"K6",
|
||||
"K7",
|
||||
// If you add registers, update asyncPreempt in runtime
|
||||
|
||||
// pseudo-registers
|
||||
|
|
@ -100,6 +110,7 @@ func init() {
|
|||
g = buildReg("g")
|
||||
fp = buildReg("X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14")
|
||||
x15 = buildReg("X15")
|
||||
mask = buildReg("K1 K2 K3 K4 K5 K6 K7")
|
||||
gpsp = gp | buildReg("SP")
|
||||
gpspsb = gpsp | buildReg("SB")
|
||||
gpspsbg = gpspsb | g
|
||||
|
|
@ -109,6 +120,7 @@ func init() {
|
|||
var (
|
||||
gponly = []regMask{gp}
|
||||
fponly = []regMask{fp}
|
||||
maskonly = []regMask{mask}
|
||||
)
|
||||
|
||||
// Common regInfo
|
||||
|
|
@ -170,6 +182,12 @@ func init() {
|
|||
fpstore = regInfo{inputs: []regMask{gpspsb, fp, 0}}
|
||||
fpstoreidx = regInfo{inputs: []regMask{gpspsb, gpsp, fp, 0}}
|
||||
|
||||
fp1m1 = regInfo{inputs: fponly, outputs: maskonly}
|
||||
m1fp1 = regInfo{inputs: maskonly, outputs: fponly}
|
||||
fp2m1 = regInfo{inputs: []regMask{fp, fp}, outputs: maskonly}
|
||||
fp2m1fp1 = regInfo{inputs: []regMask{fp, fp, mask}, outputs: fponly}
|
||||
fp2m1m1 = regInfo{inputs: []regMask{fp, fp, mask}, outputs: maskonly}
|
||||
|
||||
prefreg = regInfo{inputs: []regMask{gpspsbg}}
|
||||
)
|
||||
|
||||
|
|
@ -1199,6 +1217,54 @@ func init() {
|
|||
//
|
||||
// output[i] = (input[i] >> 7) & 1
|
||||
{name: "PMOVMSKB", argLength: 1, reg: fpgp, asm: "PMOVMSKB"},
|
||||
|
||||
// XXX SIMD
|
||||
{name: "VPADDD4", argLength: 2, reg: fp21, asm: "VPADDD", commutative: true}, // arg0 + arg1
|
||||
|
||||
{name: "VMOVDQUload128", argLength: 2, reg: fpload, asm: "VMOVDQU", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0+auxint+aux, arg1 = mem
|
||||
{name: "VMOVDQUstore128", argLength: 3, reg: fpstore, asm: "VMOVDQU", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // store, *(arg0+auxint+aux) = arg1, arg2 = mem
|
||||
|
||||
{name: "VMOVDQUload256", argLength: 2, reg: fpload, asm: "VMOVDQU", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0+auxint+aux, arg1 = mem
|
||||
{name: "VMOVDQUstore256", argLength: 3, reg: fpstore, asm: "VMOVDQU", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // store, *(arg0+auxint+aux) = arg1, arg2 = mem
|
||||
|
||||
{name: "VMOVDQUload512", argLength: 2, reg: fpload, asm: "VMOVDQU64", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0+auxint+aux, arg1 = mem
|
||||
{name: "VMOVDQUstore512", argLength: 3, reg: fpstore, asm: "VMOVDQU64", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // store, *(arg0+auxint+aux) = arg1, arg2 = mem
|
||||
|
||||
{name: "VPMOVMToVec8x16", argLength: 1, reg: m1fp1, asm: "VPMOVM2B"},
|
||||
{name: "VPMOVMToVec8x32", argLength: 1, reg: m1fp1, asm: "VPMOVM2B"},
|
||||
{name: "VPMOVMToVec8x64", argLength: 1, reg: m1fp1, asm: "VPMOVM2B"},
|
||||
|
||||
{name: "VPMOVMToVec16x8", argLength: 1, reg: m1fp1, asm: "VPMOVM2W"},
|
||||
{name: "VPMOVMToVec16x16", argLength: 1, reg: m1fp1, asm: "VPMOVM2W"},
|
||||
{name: "VPMOVMToVec16x32", argLength: 1, reg: m1fp1, asm: "VPMOVM2W"},
|
||||
|
||||
{name: "VPMOVMToVec32x4", argLength: 1, reg: m1fp1, asm: "VPMOVM2D"},
|
||||
{name: "VPMOVMToVec32x8", argLength: 1, reg: m1fp1, asm: "VPMOVM2D"},
|
||||
{name: "VPMOVMToVec32x16", argLength: 1, reg: m1fp1, asm: "VPMOVM2D"},
|
||||
|
||||
{name: "VPMOVMToVec64x2", argLength: 1, reg: m1fp1, asm: "VPMOVM2Q"},
|
||||
{name: "VPMOVMToVec64x4", argLength: 1, reg: m1fp1, asm: "VPMOVM2Q"},
|
||||
{name: "VPMOVMToVec64x8", argLength: 1, reg: m1fp1, asm: "VPMOVM2Q"},
|
||||
|
||||
{name: "VPMOVVec8x16ToM", argLength: 1, reg: fp1m1, asm: "VPMOVB2M"},
|
||||
{name: "VPMOVVec8x32ToM", argLength: 1, reg: fp1m1, asm: "VPMOVB2M"},
|
||||
{name: "VPMOVVec8x64ToM", argLength: 1, reg: fp1m1, asm: "VPMOVB2M"},
|
||||
|
||||
{name: "VPMOVVec16x8ToM", argLength: 1, reg: fp1m1, asm: "VPMOVW2M"},
|
||||
{name: "VPMOVVec16x16ToM", argLength: 1, reg: fp1m1, asm: "VPMOVW2M"},
|
||||
{name: "VPMOVVec16x32ToM", argLength: 1, reg: fp1m1, asm: "VPMOVW2M"},
|
||||
|
||||
{name: "VPMOVVec32x4ToM", argLength: 1, reg: fp1m1, asm: "VPMOVD2M"},
|
||||
{name: "VPMOVVec32x8ToM", argLength: 1, reg: fp1m1, asm: "VPMOVD2M"},
|
||||
{name: "VPMOVVec32x16ToM", argLength: 1, reg: fp1m1, asm: "VPMOVD2M"},
|
||||
|
||||
{name: "VPMOVVec64x2ToM", argLength: 1, reg: fp1m1, asm: "VPMOVQ2M"},
|
||||
{name: "VPMOVVec64x4ToM", argLength: 1, reg: fp1m1, asm: "VPMOVQ2M"},
|
||||
{name: "VPMOVVec64x8ToM", argLength: 1, reg: fp1m1, asm: "VPMOVQ2M"},
|
||||
|
||||
{name: "Zero128", argLength: 0, reg: fp01, asm: "VPXOR"},
|
||||
{name: "Zero256", argLength: 0, reg: fp01, asm: "VPXOR"},
|
||||
{name: "Zero512", argLength: 0, reg: fp01, asm: "VPXORQ"},
|
||||
}
|
||||
|
||||
var AMD64blocks = []blockData{
|
||||
|
|
@ -1230,14 +1296,15 @@ func init() {
|
|||
name: "AMD64",
|
||||
pkg: "cmd/internal/obj/x86",
|
||||
genfile: "../../amd64/ssa.go",
|
||||
ops: AMD64ops,
|
||||
genSIMDfile: "../../amd64/simdssa.go",
|
||||
ops: append(AMD64ops, simdAMD64Ops(fp11, fp21, fp2m1, fp2m1fp1, fp2m1m1)...), // AMD64ops,
|
||||
blocks: AMD64blocks,
|
||||
regnames: regNamesAMD64,
|
||||
ParamIntRegNames: "AX BX CX DI SI R8 R9 R10 R11",
|
||||
ParamFloatRegNames: "X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14",
|
||||
gpregmask: gp,
|
||||
fpregmask: fp,
|
||||
specialregmask: x15,
|
||||
specialregmask: x15 | mask,
|
||||
framepointerreg: int8(num["BP"]),
|
||||
linkreg: -1, // not used
|
||||
})
|
||||
|
|
|
|||
|
|
@ -910,7 +910,7 @@
|
|||
|
||||
// struct operations
|
||||
(StructSelect [i] x:(StructMake ___)) => x.Args[i]
|
||||
(Load <t> _ _) && t.IsStruct() && CanSSA(t) => rewriteStructLoad(v)
|
||||
(Load <t> _ _) && t.IsStruct() && CanSSA(t) && !t.IsSIMD() => rewriteStructLoad(v)
|
||||
(Store _ (StructMake ___) _) => rewriteStructStore(v)
|
||||
|
||||
(StructSelect [i] x:(Load <t> ptr mem)) && !CanSSA(t) =>
|
||||
|
|
|
|||
|
|
@ -662,6 +662,10 @@ var genericOps = []opData{
|
|||
// Prefetch instruction
|
||||
{name: "PrefetchCache", argLength: 2, hasSideEffects: true}, // Do prefetch arg0 to cache. arg0=addr, arg1=memory.
|
||||
{name: "PrefetchCacheStreamed", argLength: 2, hasSideEffects: true}, // Do non-temporal or streamed prefetch arg0 to cache. arg0=addr, arg1=memory.
|
||||
|
||||
// XXX SIMD
|
||||
{name: "Add32x4", argLength: 2}, // arg0 + arg1
|
||||
{name: "ZeroSIMD", argLength: 0},
|
||||
}
|
||||
|
||||
// kind controls successors implicit exit
|
||||
|
|
@ -689,6 +693,7 @@ var genericBlocks = []blockData{
|
|||
}
|
||||
|
||||
func init() {
|
||||
genericOps = append(genericOps, simdGenericOps()...)
|
||||
archs = append(archs, arch{
|
||||
name: "generic",
|
||||
ops: genericOps,
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ type arch struct {
|
|||
name string
|
||||
pkg string // obj package to import for this arch.
|
||||
genfile string // source file containing opcode code generation.
|
||||
genSIMDfile string // source file containing opcode code generation for SIMD.
|
||||
ops []opData
|
||||
blocks []blockData
|
||||
regnames []string
|
||||
|
|
@ -525,6 +526,15 @@ func genOp() {
|
|||
if err != nil {
|
||||
log.Fatalf("can't read %s: %v", a.genfile, err)
|
||||
}
|
||||
// Append the file of simd operations, too
|
||||
if a.genSIMDfile != "" {
|
||||
simdSrc, err := os.ReadFile(a.genSIMDfile)
|
||||
if err != nil {
|
||||
log.Fatalf("can't read %s: %v", a.genSIMDfile, err)
|
||||
}
|
||||
src = append(src, simdSrc...)
|
||||
}
|
||||
|
||||
seen := make(map[string]bool, len(a.ops))
|
||||
for _, m := range rxOp.FindAllSubmatch(src, -1) {
|
||||
seen[string(m[1])] = true
|
||||
|
|
|
|||
|
|
@ -95,6 +95,7 @@ func genLateLowerRules(arch arch) { genRulesSuffix(arch, "latelower") }
|
|||
|
||||
func genRulesSuffix(arch arch, suff string) {
|
||||
// Open input file.
|
||||
var text io.Reader
|
||||
text, err := os.Open(arch.name + suff + ".rules")
|
||||
if err != nil {
|
||||
if suff == "" {
|
||||
|
|
@ -105,6 +106,14 @@ func genRulesSuffix(arch arch, suff string) {
|
|||
return
|
||||
}
|
||||
|
||||
// Check for file of SIMD rules to add
|
||||
if suff == "" {
|
||||
simdtext, err := os.Open("simd" + arch.name + ".rules")
|
||||
if err == nil {
|
||||
text = io.MultiReader(text, simdtext)
|
||||
}
|
||||
}
|
||||
|
||||
// oprules contains a list of rules for each block and opcode
|
||||
blockrules := map[string][]Rule{}
|
||||
oprules := map[string][]Rule{}
|
||||
|
|
|
|||
4
src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
Normal file
4
src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
// Code generated by internal/simd/_gen using 'go run .'; DO NOT EDIT.
|
||||
|
||||
// (AddInt8x16 ...) => (VPADDB ...)
|
||||
// etc
|
||||
10
src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
Normal file
10
src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
// Code generated by internal/simd/_gen using 'go run .'; DO NOT EDIT.
|
||||
|
||||
package main
|
||||
|
||||
func simdAMD64Ops(fp11, fp21, fp2m1, fp2m1fp1, fp2m1m1 regInfo) []opData {
|
||||
return []opData{
|
||||
// {name: "VPADDB", argLength: 2, reg: fp21, asm: "VPADDB", commutative: true},
|
||||
// etc, generated
|
||||
}
|
||||
}
|
||||
10
src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
Normal file
10
src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
// Code generated by internal/simd/_gen using 'go run .'; DO NOT EDIT.
|
||||
|
||||
package main
|
||||
|
||||
func simdGenericOps() []opData {
|
||||
return []opData{
|
||||
// {name: "AddInt8x16", argLength: 2, commutative: true},
|
||||
// etc
|
||||
}
|
||||
}
|
||||
|
|
@ -89,6 +89,10 @@ type Types struct {
|
|||
Float32Ptr *types.Type
|
||||
Float64Ptr *types.Type
|
||||
BytePtrPtr *types.Type
|
||||
Vec128 *types.Type
|
||||
Vec256 *types.Type
|
||||
Vec512 *types.Type
|
||||
Mask *types.Type
|
||||
}
|
||||
|
||||
// NewTypes creates and populates a Types.
|
||||
|
|
@ -123,6 +127,10 @@ func (t *Types) SetTypPtrs() {
|
|||
t.Float32Ptr = types.NewPtr(types.Types[types.TFLOAT32])
|
||||
t.Float64Ptr = types.NewPtr(types.Types[types.TFLOAT64])
|
||||
t.BytePtrPtr = types.NewPtr(types.NewPtr(types.Types[types.TUINT8]))
|
||||
t.Vec128 = types.TypeVec128
|
||||
t.Vec256 = types.TypeVec256
|
||||
t.Vec512 = types.TypeVec512
|
||||
t.Mask = types.TypeMask
|
||||
}
|
||||
|
||||
type Logger interface {
|
||||
|
|
|
|||
|
|
@ -100,7 +100,7 @@ func decomposeBuiltIn(f *Func) {
|
|||
}
|
||||
case t.IsFloat():
|
||||
// floats are never decomposed, even ones bigger than RegSize
|
||||
case t.Size() > f.Config.RegSize:
|
||||
case t.Size() > f.Config.RegSize && !t.IsSIMD():
|
||||
f.Fatalf("undecomposed named type %s %v", name, t)
|
||||
}
|
||||
}
|
||||
|
|
@ -135,7 +135,7 @@ func decomposeBuiltInPhi(v *Value) {
|
|||
decomposeInterfacePhi(v)
|
||||
case v.Type.IsFloat():
|
||||
// floats are never decomposed, even ones bigger than RegSize
|
||||
case v.Type.Size() > v.Block.Func.Config.RegSize:
|
||||
case v.Type.Size() > v.Block.Func.Config.RegSize && !v.Type.IsSIMD():
|
||||
v.Fatalf("%v undecomposed type %v", v, v.Type)
|
||||
}
|
||||
}
|
||||
|
|
@ -248,7 +248,7 @@ func decomposeUser(f *Func) {
|
|||
for _, name := range f.Names {
|
||||
t := name.Type
|
||||
switch {
|
||||
case t.IsStruct():
|
||||
case isStructNotSIMD(t):
|
||||
newNames = decomposeUserStructInto(f, name, newNames)
|
||||
case t.IsArray():
|
||||
newNames = decomposeUserArrayInto(f, name, newNames)
|
||||
|
|
@ -293,7 +293,7 @@ func decomposeUserArrayInto(f *Func, name *LocalSlot, slots []*LocalSlot) []*Loc
|
|||
|
||||
if t.Elem().IsArray() {
|
||||
return decomposeUserArrayInto(f, elemName, slots)
|
||||
} else if t.Elem().IsStruct() {
|
||||
} else if isStructNotSIMD(t.Elem()) {
|
||||
return decomposeUserStructInto(f, elemName, slots)
|
||||
}
|
||||
|
||||
|
|
@ -313,7 +313,7 @@ func decomposeUserStructInto(f *Func, name *LocalSlot, slots []*LocalSlot) []*Lo
|
|||
fnames = append(fnames, fs)
|
||||
// arrays and structs will be decomposed further, so
|
||||
// there's no need to record a name
|
||||
if !fs.Type.IsArray() && !fs.Type.IsStruct() {
|
||||
if !fs.Type.IsArray() && !isStructNotSIMD(fs.Type) {
|
||||
slots = maybeAppend(f, slots, fs)
|
||||
}
|
||||
}
|
||||
|
|
@ -339,7 +339,7 @@ func decomposeUserStructInto(f *Func, name *LocalSlot, slots []*LocalSlot) []*Lo
|
|||
// now that this f.NamedValues contains values for the struct
|
||||
// fields, recurse into nested structs
|
||||
for i := 0; i < n; i++ {
|
||||
if name.Type.FieldType(i).IsStruct() {
|
||||
if isStructNotSIMD(name.Type.FieldType(i)) {
|
||||
slots = decomposeUserStructInto(f, fnames[i], slots)
|
||||
delete(f.NamedValues, *fnames[i])
|
||||
} else if name.Type.FieldType(i).IsArray() {
|
||||
|
|
@ -351,7 +351,7 @@ func decomposeUserStructInto(f *Func, name *LocalSlot, slots []*LocalSlot) []*Lo
|
|||
}
|
||||
func decomposeUserPhi(v *Value) {
|
||||
switch {
|
||||
case v.Type.IsStruct():
|
||||
case isStructNotSIMD(v.Type):
|
||||
decomposeStructPhi(v)
|
||||
case v.Type.IsArray():
|
||||
decomposeArrayPhi(v)
|
||||
|
|
@ -458,3 +458,7 @@ func deleteNamedVals(f *Func, toDelete []namedVal) {
|
|||
}
|
||||
f.Names = f.Names[:end]
|
||||
}
|
||||
|
||||
func isStructNotSIMD(t *types.Type) bool {
|
||||
return t.IsStruct() && !t.IsSIMD()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -399,6 +399,9 @@ func (x *expandState) decomposeAsNecessary(pos src.XPos, b *Block, a, m0 *Value,
|
|||
return mem
|
||||
|
||||
case types.TSTRUCT:
|
||||
if at.IsSIMD() {
|
||||
break // XXX
|
||||
}
|
||||
for i := 0; i < at.NumFields(); i++ {
|
||||
et := at.Field(i).Type // might need to read offsets from the fields
|
||||
e := b.NewValue1I(pos, OpStructSelect, et, int64(i), a)
|
||||
|
|
@ -547,6 +550,9 @@ func (x *expandState) rewriteSelectOrArg(pos src.XPos, b *Block, container, a, m
|
|||
|
||||
case types.TSTRUCT:
|
||||
// Assume ssagen/ssa.go (in buildssa) spills large aggregates so they won't appear here.
|
||||
if at.IsSIMD() {
|
||||
break // XXX
|
||||
}
|
||||
for i := 0; i < at.NumFields(); i++ {
|
||||
et := at.Field(i).Type
|
||||
e := x.rewriteSelectOrArg(pos, b, container, nil, m0, et, rc.next(et))
|
||||
|
|
@ -713,6 +719,9 @@ func (x *expandState) rewriteWideSelectToStores(pos src.XPos, b *Block, containe
|
|||
|
||||
case types.TSTRUCT:
|
||||
// Assume ssagen/ssa.go (in buildssa) spills large aggregates so they won't appear here.
|
||||
if at.IsSIMD() {
|
||||
break // XXX
|
||||
}
|
||||
for i := 0; i < at.NumFields(); i++ {
|
||||
et := at.Field(i).Type
|
||||
m0 = x.rewriteWideSelectToStores(pos, b, container, m0, et, rc.next(et))
|
||||
|
|
@ -859,7 +868,7 @@ func (c *registerCursor) at(t *types.Type, i int) registerCursor {
|
|||
rc.nextSlice += Abi1RO(i * w)
|
||||
return rc
|
||||
}
|
||||
if t.IsStruct() {
|
||||
if isStructNotSIMD(t) {
|
||||
for j := 0; j < i; j++ {
|
||||
rc.next(t.FieldType(j))
|
||||
}
|
||||
|
|
@ -973,7 +982,7 @@ func (x *expandState) regOffset(t *types.Type, i int) Abi1RO {
|
|||
if t.IsArray() {
|
||||
return Abi1RO(i) * x.regWidth(t.Elem())
|
||||
}
|
||||
if t.IsStruct() {
|
||||
if isStructNotSIMD(t) {
|
||||
k := Abi1RO(0)
|
||||
for j := 0; j < i; j++ {
|
||||
k += x.regWidth(t.FieldType(j))
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -501,6 +501,30 @@ func rewriteValueAMD64(v *Value) bool {
|
|||
return rewriteValueAMD64_OpAMD64TESTW(v)
|
||||
case OpAMD64TESTWconst:
|
||||
return rewriteValueAMD64_OpAMD64TESTWconst(v)
|
||||
case OpAMD64VPMOVVec16x16ToM:
|
||||
return rewriteValueAMD64_OpAMD64VPMOVVec16x16ToM(v)
|
||||
case OpAMD64VPMOVVec16x32ToM:
|
||||
return rewriteValueAMD64_OpAMD64VPMOVVec16x32ToM(v)
|
||||
case OpAMD64VPMOVVec16x8ToM:
|
||||
return rewriteValueAMD64_OpAMD64VPMOVVec16x8ToM(v)
|
||||
case OpAMD64VPMOVVec32x16ToM:
|
||||
return rewriteValueAMD64_OpAMD64VPMOVVec32x16ToM(v)
|
||||
case OpAMD64VPMOVVec32x4ToM:
|
||||
return rewriteValueAMD64_OpAMD64VPMOVVec32x4ToM(v)
|
||||
case OpAMD64VPMOVVec32x8ToM:
|
||||
return rewriteValueAMD64_OpAMD64VPMOVVec32x8ToM(v)
|
||||
case OpAMD64VPMOVVec64x2ToM:
|
||||
return rewriteValueAMD64_OpAMD64VPMOVVec64x2ToM(v)
|
||||
case OpAMD64VPMOVVec64x4ToM:
|
||||
return rewriteValueAMD64_OpAMD64VPMOVVec64x4ToM(v)
|
||||
case OpAMD64VPMOVVec64x8ToM:
|
||||
return rewriteValueAMD64_OpAMD64VPMOVVec64x8ToM(v)
|
||||
case OpAMD64VPMOVVec8x16ToM:
|
||||
return rewriteValueAMD64_OpAMD64VPMOVVec8x16ToM(v)
|
||||
case OpAMD64VPMOVVec8x32ToM:
|
||||
return rewriteValueAMD64_OpAMD64VPMOVVec8x32ToM(v)
|
||||
case OpAMD64VPMOVVec8x64ToM:
|
||||
return rewriteValueAMD64_OpAMD64VPMOVVec8x64ToM(v)
|
||||
case OpAMD64XADDLlock:
|
||||
return rewriteValueAMD64_OpAMD64XADDLlock(v)
|
||||
case OpAMD64XADDQlock:
|
||||
|
|
@ -1198,6 +1222,8 @@ func rewriteValueAMD64(v *Value) bool {
|
|||
case OpZeroExt8to64:
|
||||
v.Op = OpAMD64MOVBQZX
|
||||
return true
|
||||
case OpZeroSIMD:
|
||||
return rewriteValueAMD64_OpZeroSIMD(v)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
|
@ -22812,6 +22838,174 @@ func rewriteValueAMD64_OpAMD64TESTWconst(v *Value) bool {
|
|||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPMOVVec16x16ToM(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPMOVVec16x16ToM (VPMOVMToVec16x16 x))
|
||||
// result: x
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPMOVMToVec16x16 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
v.copyOf(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPMOVVec16x32ToM(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPMOVVec16x32ToM (VPMOVMToVec16x32 x))
|
||||
// result: x
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPMOVMToVec16x32 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
v.copyOf(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPMOVVec16x8ToM(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPMOVVec16x8ToM (VPMOVMToVec16x8 x))
|
||||
// result: x
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPMOVMToVec16x8 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
v.copyOf(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPMOVVec32x16ToM(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPMOVVec32x16ToM (VPMOVMToVec32x16 x))
|
||||
// result: x
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPMOVMToVec32x16 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
v.copyOf(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPMOVVec32x4ToM(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPMOVVec32x4ToM (VPMOVMToVec32x4 x))
|
||||
// result: x
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPMOVMToVec32x4 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
v.copyOf(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPMOVVec32x8ToM(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPMOVVec32x8ToM (VPMOVMToVec32x8 x))
|
||||
// result: x
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPMOVMToVec32x8 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
v.copyOf(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPMOVVec64x2ToM(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPMOVVec64x2ToM (VPMOVMToVec64x2 x))
|
||||
// result: x
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPMOVMToVec64x2 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
v.copyOf(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPMOVVec64x4ToM(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPMOVVec64x4ToM (VPMOVMToVec64x4 x))
|
||||
// result: x
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPMOVMToVec64x4 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
v.copyOf(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPMOVVec64x8ToM(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPMOVVec64x8ToM (VPMOVMToVec64x8 x))
|
||||
// result: x
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPMOVMToVec64x8 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
v.copyOf(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPMOVVec8x16ToM(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPMOVVec8x16ToM (VPMOVMToVec8x16 x))
|
||||
// result: x
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPMOVMToVec8x16 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
v.copyOf(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPMOVVec8x32ToM(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPMOVVec8x32ToM (VPMOVMToVec8x32 x))
|
||||
// result: x
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPMOVMToVec8x32 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
v.copyOf(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPMOVVec8x64ToM(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPMOVVec8x64ToM (VPMOVMToVec8x64 x))
|
||||
// result: x
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPMOVMToVec8x64 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
v.copyOf(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64XADDLlock(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
|
|
@ -26215,6 +26409,48 @@ func rewriteValueAMD64_OpLoad(v *Value) bool {
|
|||
v.AddArg2(ptr, mem)
|
||||
return true
|
||||
}
|
||||
// match: (Load <t> ptr mem)
|
||||
// cond: t.Size() == 16
|
||||
// result: (VMOVDQUload128 ptr mem)
|
||||
for {
|
||||
t := v.Type
|
||||
ptr := v_0
|
||||
mem := v_1
|
||||
if !(t.Size() == 16) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VMOVDQUload128)
|
||||
v.AddArg2(ptr, mem)
|
||||
return true
|
||||
}
|
||||
// match: (Load <t> ptr mem)
|
||||
// cond: t.Size() == 32
|
||||
// result: (VMOVDQUload256 ptr mem)
|
||||
for {
|
||||
t := v.Type
|
||||
ptr := v_0
|
||||
mem := v_1
|
||||
if !(t.Size() == 32) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VMOVDQUload256)
|
||||
v.AddArg2(ptr, mem)
|
||||
return true
|
||||
}
|
||||
// match: (Load <t> ptr mem)
|
||||
// cond: t.Size() == 64
|
||||
// result: (VMOVDQUload512 ptr mem)
|
||||
for {
|
||||
t := v.Type
|
||||
ptr := v_0
|
||||
mem := v_1
|
||||
if !(t.Size() == 64) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VMOVDQUload512)
|
||||
v.AddArg2(ptr, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpLocalAddr(v *Value) bool {
|
||||
|
|
@ -29764,6 +30000,51 @@ func rewriteValueAMD64_OpStore(v *Value) bool {
|
|||
v.AddArg3(ptr, val, mem)
|
||||
return true
|
||||
}
|
||||
// match: (Store {t} ptr val mem)
|
||||
// cond: t.Size() == 16
|
||||
// result: (VMOVDQUstore128 ptr val mem)
|
||||
for {
|
||||
t := auxToType(v.Aux)
|
||||
ptr := v_0
|
||||
val := v_1
|
||||
mem := v_2
|
||||
if !(t.Size() == 16) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VMOVDQUstore128)
|
||||
v.AddArg3(ptr, val, mem)
|
||||
return true
|
||||
}
|
||||
// match: (Store {t} ptr val mem)
|
||||
// cond: t.Size() == 32
|
||||
// result: (VMOVDQUstore256 ptr val mem)
|
||||
for {
|
||||
t := auxToType(v.Aux)
|
||||
ptr := v_0
|
||||
val := v_1
|
||||
mem := v_2
|
||||
if !(t.Size() == 32) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VMOVDQUstore256)
|
||||
v.AddArg3(ptr, val, mem)
|
||||
return true
|
||||
}
|
||||
// match: (Store {t} ptr val mem)
|
||||
// cond: t.Size() == 64
|
||||
// result: (VMOVDQUstore512 ptr val mem)
|
||||
for {
|
||||
t := auxToType(v.Aux)
|
||||
ptr := v_0
|
||||
val := v_1
|
||||
mem := v_2
|
||||
if !(t.Size() == 64) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VMOVDQUstore512)
|
||||
v.AddArg3(ptr, val, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpTrunc(v *Value) bool {
|
||||
|
|
@ -30117,6 +30398,45 @@ func rewriteValueAMD64_OpZero(v *Value) bool {
|
|||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpZeroSIMD(v *Value) bool {
|
||||
// match: (ZeroSIMD <t>)
|
||||
// cond: t.Size() == 16
|
||||
// result: (Zero128 <t>)
|
||||
for {
|
||||
t := v.Type
|
||||
if !(t.Size() == 16) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64Zero128)
|
||||
v.Type = t
|
||||
return true
|
||||
}
|
||||
// match: (ZeroSIMD <t>)
|
||||
// cond: t.Size() == 32
|
||||
// result: (Zero256 <t>)
|
||||
for {
|
||||
t := v.Type
|
||||
if !(t.Size() == 32) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64Zero256)
|
||||
v.Type = t
|
||||
return true
|
||||
}
|
||||
// match: (ZeroSIMD <t>)
|
||||
// cond: t.Size() == 64
|
||||
// result: (Zero512 <t>)
|
||||
for {
|
||||
t := v.Type
|
||||
if !(t.Size() == 64) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64Zero512)
|
||||
v.Type = t
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteBlockAMD64(b *Block) bool {
|
||||
typ := &b.Func.Config.Types
|
||||
switch b.Kind {
|
||||
|
|
|
|||
|
|
@ -14149,11 +14149,11 @@ func rewriteValuegeneric_OpLoad(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
// match: (Load <t> _ _)
|
||||
// cond: t.IsStruct() && CanSSA(t)
|
||||
// cond: t.IsStruct() && CanSSA(t) && !t.IsSIMD()
|
||||
// result: rewriteStructLoad(v)
|
||||
for {
|
||||
t := v.Type
|
||||
if !(t.IsStruct() && CanSSA(t)) {
|
||||
if !(t.IsStruct() && CanSSA(t) && !t.IsSIMD()) {
|
||||
break
|
||||
}
|
||||
v.copyOf(rewriteStructLoad(v))
|
||||
|
|
|
|||
|
|
@ -596,6 +596,9 @@ func AutoVar(v *Value) (*ir.Name, int64) {
|
|||
// CanSSA reports whether values of type t can be represented as a Value.
|
||||
func CanSSA(t *types.Type) bool {
|
||||
types.CalcSize(t)
|
||||
if t.IsSIMD() {
|
||||
return true
|
||||
}
|
||||
if t.Size() > int64(4*types.PtrSize) {
|
||||
// 4*Widthptr is an arbitrary constant. We want it
|
||||
// to be at least 3*Widthptr so slices can be registerized.
|
||||
|
|
|
|||
|
|
@ -1602,6 +1602,104 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
|
|||
return s.newValue1(ssa.OpZeroExt8to64, types.Types[types.TUINT64], out)
|
||||
},
|
||||
sys.AMD64)
|
||||
|
||||
if buildcfg.Experiment.SIMD {
|
||||
// Only enable intrinsics, if SIMD experiment.
|
||||
simdIntrinsics(addF)
|
||||
}
|
||||
}
|
||||
|
||||
// simdLoadSliceMethod does intrinsic for method form of Load-from-slice
|
||||
func simdLoadSliceMethod(nElts int64) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
// args[0] is unused except for its type.
|
||||
t := args[0].Type
|
||||
slice := args[1]
|
||||
arrlen := s.constInt(types.Types[types.TINT], nElts)
|
||||
cap := s.newValue1(ssa.OpSliceLen, types.Types[types.TINT], slice)
|
||||
s.boundsCheck(arrlen, cap, ssa.BoundsConvert, false)
|
||||
ptr := s.newValue1(ssa.OpSlicePtr, t.PtrTo(), slice) // is this the right type? Does it need a convert?
|
||||
return s.newValue2(ssa.OpLoad, t, ptr, s.mem())
|
||||
}
|
||||
}
|
||||
|
||||
// simdLoadSlice does intrinsic for function form of Load-from-slice
|
||||
func simdLoadSlice(nElts int64) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
// args[0] is unused except for its type.
|
||||
t := n.Type()
|
||||
slice := args[0]
|
||||
arrlen := s.constInt(types.Types[types.TINT], nElts)
|
||||
cap := s.newValue1(ssa.OpSliceLen, types.Types[types.TINT], slice)
|
||||
s.boundsCheck(arrlen, cap, ssa.BoundsConvert, false)
|
||||
ptr := s.newValue1(ssa.OpSlicePtr, t.PtrTo(), slice) // is this the right type? Does it need a convert?
|
||||
return s.newValue2(ssa.OpLoad, t, ptr, s.mem())
|
||||
}
|
||||
}
|
||||
|
||||
func simdStoreSlice(nElts int64) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
x := args[0]
|
||||
t := x.Type
|
||||
slice := args[1]
|
||||
arrlen := s.constInt(types.Types[types.TINT], nElts)
|
||||
cap := s.newValue1(ssa.OpSliceLen, types.Types[types.TINT], slice)
|
||||
s.boundsCheck(arrlen, cap, ssa.BoundsConvert, false)
|
||||
ptr := s.newValue1(ssa.OpSlicePtr, t.PtrTo(), slice) // is this the right type? Does it need a convert?
|
||||
s.store(t, ptr, x)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func simdLoadSliceMethodPart(nElts int64) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
// args[0] is unused except for its type.
|
||||
t := args[0].Type
|
||||
slice := args[1]
|
||||
arrLen := s.constInt(types.Types[types.TINT], nElts)
|
||||
cap := s.newValue1(ssa.OpSliceLen, types.Types[types.TINT], slice)
|
||||
|
||||
/*
|
||||
if off := vec.Len() - len(slice) ; off <= 0 {
|
||||
plain load
|
||||
} else {
|
||||
load mask[off] into a scratch vector
|
||||
masked load/store
|
||||
}
|
||||
*/
|
||||
|
||||
// TODO SIMD support on a 32-bit processor
|
||||
|
||||
off := s.newValue2(ssa.OpSub64, types.Types[types.TINT], arrLen, cap)
|
||||
cond := s.newValue2(ssa.OpLeq64, types.Types[types.TBOOL], off, s.zeroVal(types.Types[types.TINT]))
|
||||
b := s.endBlock()
|
||||
b.Kind = ssa.BlockIf
|
||||
b.SetControl(cond)
|
||||
bTrue := s.f.NewBlock(ssa.BlockPlain)
|
||||
bFalse := s.f.NewBlock(ssa.BlockPlain)
|
||||
bEnd := s.f.NewBlock(ssa.BlockPlain)
|
||||
b.AddEdgeTo(bTrue)
|
||||
b.AddEdgeTo(bFalse)
|
||||
|
||||
simdRes := ssaMarker("simdload")
|
||||
|
||||
// We have atomic instructions - use it directly.
|
||||
s.startBlock(bTrue)
|
||||
ptr := s.newValue1(ssa.OpSlicePtr, t.PtrTo(), slice)
|
||||
s.vars[simdRes] = s.newValue2(ssa.OpLoad, t, ptr, s.mem())
|
||||
s.endBlock().AddEdgeTo(bEnd)
|
||||
|
||||
// Use original instruction sequence.
|
||||
s.startBlock(bFalse)
|
||||
// NOT IMPLEMENTED, NEED TO ADD GENERIC PARTIAL LOAD/STORE
|
||||
// MASK REGISTER DEPENDS ON ARCH AND ITS SIMD VERSION.
|
||||
s.endBlock().AddEdgeTo(bEnd)
|
||||
|
||||
// Merge results.
|
||||
s.startBlock(bEnd)
|
||||
return s.variable(simdRes, t)
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// findIntrinsic returns a function which builds the SSA equivalent of the
|
||||
|
|
@ -1627,7 +1725,8 @@ func findIntrinsic(sym *types.Sym) intrinsicBuilder {
|
|||
|
||||
fn := sym.Name
|
||||
if ssa.IntrinsicsDisable {
|
||||
if pkg == "internal/runtime/sys" && (fn == "GetCallerPC" || fn == "GrtCallerSP" || fn == "GetClosurePtr") {
|
||||
if pkg == "internal/runtime/sys" && (fn == "GetCallerPC" || fn == "GrtCallerSP" || fn == "GetClosurePtr") ||
|
||||
pkg == "internal/simd" || pkg == "simd" { // TODO after simd has been moved to package simd, remove internal/simd
|
||||
// These runtime functions don't have definitions, must be intrinsics.
|
||||
} else {
|
||||
return nil
|
||||
|
|
|
|||
15
src/cmd/compile/internal/ssagen/simdintrinsics.go
Normal file
15
src/cmd/compile/internal/ssagen/simdintrinsics.go
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
// Code generated by internal/simd/_gen using 'go run .'; DO NOT EDIT.
|
||||
|
||||
package ssagen
|
||||
|
||||
import (
|
||||
// "cmd/compile/internal/ir"
|
||||
// "cmd/compile/internal/ssa"
|
||||
// "cmd/compile/internal/types"
|
||||
"cmd/internal/sys"
|
||||
)
|
||||
|
||||
func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) {
|
||||
// addF("internal/simd", "Int32x4.Uint32x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||
// etc
|
||||
}
|
||||
|
|
@ -623,6 +623,9 @@ func buildssa(fn *ir.Func, worker int, isPgoHot bool) *ssa.Func {
|
|||
// TODO figure out exactly what's unused, don't spill it. Make liveness fine-grained, also.
|
||||
for _, p := range params.InParams() {
|
||||
typs, offs := p.RegisterTypesAndOffsets()
|
||||
if len(offs) < len(typs) {
|
||||
s.Fatalf("len(offs)=%d < len(typs)=%d, params=\n%s", len(offs), len(typs), params)
|
||||
}
|
||||
for i, t := range typs {
|
||||
o := offs[i] // offset within parameter
|
||||
fo := p.FrameOffset(params) // offset of parameter in frame
|
||||
|
|
@ -1399,7 +1402,7 @@ func (s *state) instrument(t *types.Type, addr *ssa.Value, kind instrumentKind)
|
|||
// If it is instrumenting for MSAN or ASAN and t is a struct type, it instruments
|
||||
// operation for each field, instead of for the whole struct.
|
||||
func (s *state) instrumentFields(t *types.Type, addr *ssa.Value, kind instrumentKind) {
|
||||
if !(base.Flag.MSan || base.Flag.ASan) || !t.IsStruct() {
|
||||
if !(base.Flag.MSan || base.Flag.ASan) || !isStructNotSIMD(t) {
|
||||
s.instrument(t, addr, kind)
|
||||
return
|
||||
}
|
||||
|
|
@ -4335,7 +4338,7 @@ func (s *state) zeroVal(t *types.Type) *ssa.Value {
|
|||
return s.constInterface(t)
|
||||
case t.IsSlice():
|
||||
return s.constSlice(t)
|
||||
case t.IsStruct():
|
||||
case isStructNotSIMD(t):
|
||||
n := t.NumFields()
|
||||
v := s.entryNewValue0(ssa.OpStructMake, t)
|
||||
for i := 0; i < n; i++ {
|
||||
|
|
@ -4349,6 +4352,8 @@ func (s *state) zeroVal(t *types.Type) *ssa.Value {
|
|||
case 1:
|
||||
return s.entryNewValue1(ssa.OpArrayMake1, t, s.zeroVal(t.Elem()))
|
||||
}
|
||||
case t.IsSIMD():
|
||||
return s.newValue0(ssa.OpZeroSIMD, t)
|
||||
}
|
||||
s.Fatalf("zero for type %v not implemented", t)
|
||||
return nil
|
||||
|
|
@ -5328,7 +5333,7 @@ func (s *state) storeType(t *types.Type, left, right *ssa.Value, skip skipMask,
|
|||
// do *left = right for all scalar (non-pointer) parts of t.
|
||||
func (s *state) storeTypeScalars(t *types.Type, left, right *ssa.Value, skip skipMask) {
|
||||
switch {
|
||||
case t.IsBoolean() || t.IsInteger() || t.IsFloat() || t.IsComplex():
|
||||
case t.IsBoolean() || t.IsInteger() || t.IsFloat() || t.IsComplex() || t.IsSIMD():
|
||||
s.store(t, left, right)
|
||||
case t.IsPtrShaped():
|
||||
if t.IsPtr() && t.Elem().NotInHeap() {
|
||||
|
|
@ -5357,7 +5362,7 @@ func (s *state) storeTypeScalars(t *types.Type, left, right *ssa.Value, skip ski
|
|||
// itab field doesn't need a write barrier (even though it is a pointer).
|
||||
itab := s.newValue1(ssa.OpITab, s.f.Config.Types.BytePtr, right)
|
||||
s.store(types.Types[types.TUINTPTR], left, itab)
|
||||
case t.IsStruct():
|
||||
case isStructNotSIMD(t):
|
||||
n := t.NumFields()
|
||||
for i := 0; i < n; i++ {
|
||||
ft := t.FieldType(i)
|
||||
|
|
@ -5394,7 +5399,7 @@ func (s *state) storeTypePtrs(t *types.Type, left, right *ssa.Value) {
|
|||
idata := s.newValue1(ssa.OpIData, s.f.Config.Types.BytePtr, right)
|
||||
idataAddr := s.newValue1I(ssa.OpOffPtr, s.f.Config.Types.BytePtrPtr, s.config.PtrSize, left)
|
||||
s.store(s.f.Config.Types.BytePtr, idataAddr, idata)
|
||||
case t.IsStruct():
|
||||
case isStructNotSIMD(t):
|
||||
n := t.NumFields()
|
||||
for i := 0; i < n; i++ {
|
||||
ft := t.FieldType(i)
|
||||
|
|
@ -6477,7 +6482,7 @@ func EmitArgInfo(f *ir.Func, abiInfo *abi.ABIParamResultInfo) *obj.LSym {
|
|||
uintptrTyp := types.Types[types.TUINTPTR]
|
||||
|
||||
isAggregate := func(t *types.Type) bool {
|
||||
return t.IsStruct() || t.IsArray() || t.IsComplex() || t.IsInterface() || t.IsString() || t.IsSlice()
|
||||
return isStructNotSIMD(t) || t.IsArray() || t.IsComplex() || t.IsInterface() || t.IsString() || t.IsSlice()
|
||||
}
|
||||
|
||||
wOff := 0
|
||||
|
|
@ -6537,7 +6542,7 @@ func EmitArgInfo(f *ir.Func, abiInfo *abi.ABIParamResultInfo) *obj.LSym {
|
|||
}
|
||||
baseOffset += t.Elem().Size()
|
||||
}
|
||||
case t.IsStruct():
|
||||
case isStructNotSIMD(t):
|
||||
if t.NumFields() == 0 {
|
||||
n++ // {} counts as a component
|
||||
break
|
||||
|
|
@ -7554,7 +7559,7 @@ func (s *State) UseArgs(n int64) {
|
|||
// fieldIdx finds the index of the field referred to by the ODOT node n.
|
||||
func fieldIdx(n *ir.SelectorExpr) int {
|
||||
t := n.X.Type()
|
||||
if !t.IsStruct() {
|
||||
if !isStructNotSIMD(t) {
|
||||
panic("ODOT's LHS is not a struct")
|
||||
}
|
||||
|
||||
|
|
@ -7762,6 +7767,10 @@ func SpillSlotAddr(spill ssa.Spill, baseReg int16, extraOffset int64) obj.Addr {
|
|||
}
|
||||
}
|
||||
|
||||
func isStructNotSIMD(t *types.Type) bool {
|
||||
return t.IsStruct() && !t.IsSIMD()
|
||||
}
|
||||
|
||||
var (
|
||||
BoundsCheckFunc [ssa.BoundsKindCount]*obj.LSym
|
||||
ExtendCheckFunc [ssa.BoundsKindCount]*obj.LSym
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import (
|
|||
|
||||
"cmd/compile/internal/base"
|
||||
"cmd/internal/src"
|
||||
"internal/buildcfg"
|
||||
"internal/types/errors"
|
||||
)
|
||||
|
||||
|
|
@ -410,6 +411,10 @@ func CalcSize(t *Type) {
|
|||
}
|
||||
CalcStructSize(t)
|
||||
w = t.width
|
||||
if t.IsSIMD() { // XXX
|
||||
t.intRegs = 0
|
||||
t.floatRegs = 1
|
||||
}
|
||||
|
||||
// make fake type to check later to
|
||||
// trigger function argument computation.
|
||||
|
|
@ -452,6 +457,31 @@ func CalcSize(t *Type) {
|
|||
ResumeCheckSize()
|
||||
}
|
||||
|
||||
// simdify marks as type as "SIMD", either as a tag field,
|
||||
// or having the SIMD attribute. The tag field is a marker
|
||||
// type used to identify a struct that is not really a struct.
|
||||
// A SIMD type is allocated to a vector register (on amd64,
|
||||
// xmm, ymm, or zmm). The fields of a SIMD type are ignored
|
||||
// by the compiler except for the space that they reserve.
|
||||
func simdify(st *Type, isTag bool) {
|
||||
st.align = 8
|
||||
st.alg = AMEM
|
||||
st.intRegs = 0
|
||||
st.isSIMD = true
|
||||
if isTag {
|
||||
st.width = 0
|
||||
st.isSIMDTag = true
|
||||
st.floatRegs = 0
|
||||
} else {
|
||||
st.floatRegs = 1
|
||||
}
|
||||
// if st.Sym() != nil {
|
||||
// base.Warn("Simdify %s, %v, %d", st.Sym().Name, isTag, st.width)
|
||||
// } else {
|
||||
// base.Warn("Simdify %v, %v, %d", st, isTag, st.width)
|
||||
// }
|
||||
}
|
||||
|
||||
// CalcStructSize calculates the size of t,
|
||||
// filling in t.width, t.align, t.intRegs, and t.floatRegs,
|
||||
// even if size calculation is otherwise disabled.
|
||||
|
|
@ -464,10 +494,27 @@ func CalcStructSize(t *Type) {
|
|||
switch {
|
||||
case sym.Name == "align64" && isAtomicStdPkg(sym.Pkg):
|
||||
maxAlign = 8
|
||||
|
||||
case buildcfg.Experiment.SIMD && (sym.Pkg.Path == "internal/simd" || sym.Pkg.Path == "simd") && len(t.Fields()) >= 1:
|
||||
// This gates the experiment -- without it, no user-visible types can be "simd".
|
||||
// The SSA-visible SIMD types remain.
|
||||
// TODO after simd has been moved to package simd, remove internal/simd.
|
||||
switch sym.Name {
|
||||
case "v128":
|
||||
simdify(t, true)
|
||||
return
|
||||
case "v256":
|
||||
simdify(t, true)
|
||||
return
|
||||
case "v512":
|
||||
simdify(t, true)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fields := t.Fields()
|
||||
|
||||
size := calcStructOffset(t, fields, 0)
|
||||
|
||||
// For non-zero-sized structs which end in a zero-sized field, we
|
||||
|
|
@ -540,6 +587,11 @@ func CalcStructSize(t *Type) {
|
|||
break
|
||||
}
|
||||
}
|
||||
|
||||
if len(t.Fields()) >= 1 && t.Fields()[0].Type.isSIMDTag {
|
||||
// this catches `type Foo simd.Whatever` -- Foo is also SIMD.
|
||||
simdify(t, false)
|
||||
}
|
||||
}
|
||||
|
||||
// CalcArraySize calculates the size of t,
|
||||
|
|
|
|||
|
|
@ -203,6 +203,7 @@ type Type struct {
|
|||
|
||||
flags bitset8
|
||||
alg AlgKind // valid if Align > 0
|
||||
isSIMDTag, isSIMD bool // tag is the marker type, isSIMD means has marker type
|
||||
|
||||
// size of prefix of object that contains all pointers. valid if Align > 0.
|
||||
// Note that for pointers, this is always PtrSize even if the element type
|
||||
|
|
@ -605,6 +606,12 @@ func newSSA(name string) *Type {
|
|||
return t
|
||||
}
|
||||
|
||||
func newSIMD(name string) *Type {
|
||||
t := newSSA(name)
|
||||
t.isSIMD = true
|
||||
return t
|
||||
}
|
||||
|
||||
// NewMap returns a new map Type with key type k and element (aka value) type v.
|
||||
func NewMap(k, v *Type) *Type {
|
||||
t := newType(TMAP)
|
||||
|
|
@ -995,10 +1002,7 @@ func (t *Type) ArgWidth() int64 {
|
|||
|
||||
func (t *Type) Size() int64 {
|
||||
if t.kind == TSSA {
|
||||
if t == TypeInt128 {
|
||||
return 16
|
||||
}
|
||||
return 0
|
||||
return t.width
|
||||
}
|
||||
CalcSize(t)
|
||||
return t.width
|
||||
|
|
@ -1626,12 +1630,26 @@ var (
|
|||
TypeFlags = newSSA("flags")
|
||||
TypeVoid = newSSA("void")
|
||||
TypeInt128 = newSSA("int128")
|
||||
TypeVec128 = newSIMD("vec128")
|
||||
TypeVec256 = newSIMD("vec256")
|
||||
TypeVec512 = newSIMD("vec512")
|
||||
TypeMask = newSSA("mask") // not a vector, not 100% sure what this should be.
|
||||
TypeResultMem = newResults([]*Type{TypeMem})
|
||||
)
|
||||
|
||||
func init() {
|
||||
TypeInt128.width = 16
|
||||
TypeInt128.align = 8
|
||||
|
||||
TypeVec128.width = 16
|
||||
TypeVec128.align = 8
|
||||
TypeVec256.width = 32
|
||||
TypeVec256.align = 8
|
||||
TypeVec512.width = 64
|
||||
TypeVec512.align = 8
|
||||
|
||||
TypeMask.width = 8 // This will depend on the architecture; spilling will be "interesting".
|
||||
TypeMask.align = 8
|
||||
}
|
||||
|
||||
// NewNamed returns a new named type for the given type name. obj should be an
|
||||
|
|
@ -2017,3 +2035,7 @@ var SimType [NTYPE]Kind
|
|||
|
||||
// Fake package for shape types (see typecheck.Shapify()).
|
||||
var ShapePkg = NewPkg("go.shape", "go.shape")
|
||||
|
||||
func (t *Type) IsSIMD() bool {
|
||||
return t.isSIMD
|
||||
}
|
||||
|
|
|
|||
7
src/internal/simd/dummy.s
Normal file
7
src/internal/simd/dummy.s
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
// Copyright 2025 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build amd64
|
||||
|
||||
// Empty file to allow bodyless functions.
|
||||
145
src/internal/simd/testdata/sample.go
vendored
Normal file
145
src/internal/simd/testdata/sample.go
vendored
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
package sample
|
||||
|
||||
import (
|
||||
"internal/simd"
|
||||
"os"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
type S1 = simd.Float64x4
|
||||
|
||||
type S2 simd.Float64x4
|
||||
|
||||
func (s S2) Len() int {
|
||||
return simd.Float64x4(s).Len()
|
||||
}
|
||||
|
||||
func (s S2) Load(a []float64) S2 {
|
||||
return S2(simd.LoadFloat64x4FromSlice(a))
|
||||
}
|
||||
|
||||
func (s S2) Store(a []float64) {
|
||||
simd.Float64x4(s).Store(a)
|
||||
}
|
||||
|
||||
func (s S2) Add(a S2) S2 {
|
||||
return S2(simd.Float64x4(s).Add(simd.Float64x4(a)))
|
||||
}
|
||||
|
||||
func (s S2) Mul(a S2) S2 {
|
||||
return S2(simd.Float64x4(s).Mul(simd.Float64x4(a)))
|
||||
}
|
||||
|
||||
type S3 struct {
|
||||
simd.Float64x4
|
||||
}
|
||||
|
||||
func ip64_0(a, b []float64) float64 {
|
||||
s := 0.0
|
||||
for i := range a {
|
||||
s += a[i] * b[i]
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func ip64_1(a, b []float64) float64 {
|
||||
var z S1
|
||||
sum := z
|
||||
var i int
|
||||
stride := z.Len()
|
||||
for ; i <= len(a)-stride; i += stride {
|
||||
va := simd.LoadFloat64x4FromSlice(a[i:])
|
||||
vb := simd.LoadFloat64x4FromSlice(b[i:])
|
||||
sum = sum.Add(va.Mul(vb))
|
||||
}
|
||||
var tmp [4]float64
|
||||
sum.Store(tmp[:])
|
||||
return tmp[0] + tmp[1] + tmp[2] + tmp[3]
|
||||
}
|
||||
|
||||
func ip64_1a(a, b []float64) float64 {
|
||||
var z S1
|
||||
sum := z
|
||||
var i int
|
||||
stride := z.Len()
|
||||
for ; i <= len(a)-stride; i += stride {
|
||||
va := simd.LoadFloat64x4FromSlice(a[i:])
|
||||
vb := simd.LoadFloat64x4FromSlice(b[i:])
|
||||
sum = FMA(sum, va, vb)
|
||||
}
|
||||
var tmp [4]float64
|
||||
sum.Store(tmp[:])
|
||||
return tmp[0] + tmp[1] + tmp[2] + tmp[3]
|
||||
}
|
||||
|
||||
//go:noinline
|
||||
func FMA(a, b, c simd.Float64x4) simd.Float64x4 {
|
||||
return a.Add(b.Mul(c))
|
||||
}
|
||||
|
||||
func ip64_2(a, b []float64) float64 {
|
||||
var z S2
|
||||
sum := z
|
||||
var i int
|
||||
stride := z.Len()
|
||||
for ; i <= len(a)-stride; i += stride {
|
||||
va := z.Load(a[i:])
|
||||
vb := z.Load(b[i:])
|
||||
sum = sum.Add(va.Mul(vb))
|
||||
}
|
||||
var tmp [4]float64
|
||||
sum.Store(tmp[:])
|
||||
return tmp[0] + tmp[1] + tmp[2] + tmp[3]
|
||||
}
|
||||
|
||||
func ip64_3(a, b []float64) float64 {
|
||||
var z S3
|
||||
sum := z
|
||||
var i int
|
||||
stride := z.Len()
|
||||
for ; i <= len(a)-stride; i += stride {
|
||||
va := simd.LoadFloat64x4FromSlice(a[i:])
|
||||
vb := simd.LoadFloat64x4FromSlice(b[i:])
|
||||
sum = S3{sum.Add(va.Mul(vb))}
|
||||
}
|
||||
var tmp [4]float64
|
||||
sum.Store(tmp[:])
|
||||
return tmp[0] + tmp[1] + tmp[2] + tmp[3]
|
||||
}
|
||||
|
||||
func main() {
|
||||
a := []float64{1, 2, 3, 4, 5, 6, 7, 8}
|
||||
ip0 := ip64_0(a, a)
|
||||
ip1 := ip64_1(a, a)
|
||||
ip1a := ip64_1a(a, a)
|
||||
ip2 := ip64_2(a, a)
|
||||
ip3 := ip64_3(a, a)
|
||||
fmt.Printf("Test IP = %f\n", ip0)
|
||||
fmt.Printf("SIMD IP 1 = %f\n", ip1)
|
||||
fmt.Printf("SIMD IP 1a = %f\n", ip1a)
|
||||
fmt.Printf("SIMD IP 2 = %f\n", ip2)
|
||||
fmt.Printf("SIMD IP 3 = %f\n", ip3)
|
||||
var z1 S1
|
||||
var z2 S2
|
||||
var z3 S2
|
||||
|
||||
s1, s2, s3 := unsafe.Sizeof(z1), unsafe.Sizeof(z2), unsafe.Sizeof(z3)
|
||||
|
||||
fmt.Printf("unsafe.Sizeof(z1, z2, z3)=%d, %d, %d\n", s1, s2, s3)
|
||||
|
||||
fail := false
|
||||
|
||||
if s1 != 32 || s2 != 32 || s3 != 32 {
|
||||
fmt.Println("Failed a sizeof check, should all be 32")
|
||||
fail = true
|
||||
}
|
||||
|
||||
if ip1 != ip0 || ip1a != ip0 || ip2 != ip0 || ip3 != ip0 {
|
||||
fmt.Println("Failed an inner product check, should all be", ip0)
|
||||
fail = true
|
||||
}
|
||||
|
||||
if fail {
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue