mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile, simd: support load from bits for mask
This CL is partially generated by CL 688855. Change-Id: I68d5fbad9445a3d2cf671822be1c0b82e7290396 Reviewed-on: https://go-review.googlesource.com/c/go/+/688875 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
parent
f0e9dc0975
commit
957f06c410
10 changed files with 480 additions and 26 deletions
|
|
@ -1461,13 +1461,13 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||||
p.AddRestSourceReg(simdReg(v.Args[1]))
|
p.AddRestSourceReg(simdReg(v.Args[1]))
|
||||||
p.To.Type = obj.TYPE_REG
|
p.To.Type = obj.TYPE_REG
|
||||||
p.To.Reg = simdReg(v)
|
p.To.Reg = simdReg(v)
|
||||||
case ssa.OpAMD64VMOVDQUload128, ssa.OpAMD64VMOVDQUload256, ssa.OpAMD64VMOVDQUload512:
|
case ssa.OpAMD64VMOVDQUload128, ssa.OpAMD64VMOVDQUload256, ssa.OpAMD64VMOVDQUload512, ssa.OpAMD64KMOVQload:
|
||||||
p := s.Prog(v.Op.Asm())
|
p := s.Prog(v.Op.Asm())
|
||||||
p.From.Type = obj.TYPE_MEM
|
p.From.Type = obj.TYPE_MEM
|
||||||
p.From.Reg = v.Args[0].Reg()
|
p.From.Reg = v.Args[0].Reg()
|
||||||
ssagen.AddAux(&p.From, v)
|
ssagen.AddAux(&p.From, v)
|
||||||
p.To.Type = obj.TYPE_REG
|
p.To.Type = obj.TYPE_REG
|
||||||
p.To.Reg = simdReg(v)
|
p.To.Reg = simdOrMaskReg(v)
|
||||||
case ssa.OpAMD64VMOVDQUstore128, ssa.OpAMD64VMOVDQUstore256, ssa.OpAMD64VMOVDQUstore512:
|
case ssa.OpAMD64VMOVDQUstore128, ssa.OpAMD64VMOVDQUstore256, ssa.OpAMD64VMOVDQUstore512:
|
||||||
p := s.Prog(v.Op.Asm())
|
p := s.Prog(v.Op.Asm())
|
||||||
p.From.Type = obj.TYPE_REG
|
p.From.Type = obj.TYPE_REG
|
||||||
|
|
|
||||||
|
|
@ -1682,6 +1682,22 @@
|
||||||
(Select0 a:(ADD(Q|L)constflags [c] x)) && a.Uses == 1 => (ADD(Q|L)const [c] x)
|
(Select0 a:(ADD(Q|L)constflags [c] x)) && a.Uses == 1 => (ADD(Q|L)const [c] x)
|
||||||
|
|
||||||
// XXX SIMD
|
// XXX SIMD
|
||||||
|
(LoadMask8x16 <t> ptr mem) => (VPMOVMToVec8x16 <types.TypeVec128> (KMOVQload <t> ptr mem))
|
||||||
|
(LoadMask8x32 <t> ptr mem) => (VPMOVMToVec8x32 <types.TypeVec256> (KMOVQload <t> ptr mem))
|
||||||
|
(LoadMask8x64 <t> ptr mem) => (VPMOVMToVec8x64 <types.TypeVec512> (KMOVQload <t> ptr mem))
|
||||||
|
|
||||||
|
(LoadMask16x8 <t> ptr mem) => (VPMOVMToVec16x8 <types.TypeVec128> (KMOVQload <t> ptr mem))
|
||||||
|
(LoadMask16x16 <t> ptr mem) => (VPMOVMToVec16x16 <types.TypeVec256> (KMOVQload <t> ptr mem))
|
||||||
|
(LoadMask16x32 <t> ptr mem) => (VPMOVMToVec16x32 <types.TypeVec512> (KMOVQload <t> ptr mem))
|
||||||
|
|
||||||
|
(LoadMask32x4 <t> ptr mem) => (VPMOVMToVec32x4 <types.TypeVec128> (KMOVQload <t> ptr mem))
|
||||||
|
(LoadMask32x8 <t> ptr mem) => (VPMOVMToVec32x8 <types.TypeVec256> (KMOVQload <t> ptr mem))
|
||||||
|
(LoadMask32x16 <t> ptr mem) => (VPMOVMToVec32x16 <types.TypeVec512> (KMOVQload <t> ptr mem))
|
||||||
|
|
||||||
|
(LoadMask64x2 <t> ptr mem) => (VPMOVMToVec64x2 <types.TypeVec128> (KMOVQload <t> ptr mem))
|
||||||
|
(LoadMask64x4 <t> ptr mem) => (VPMOVMToVec64x4 <types.TypeVec256> (KMOVQload <t> ptr mem))
|
||||||
|
(LoadMask64x8 <t> ptr mem) => (VPMOVMToVec64x8 <types.TypeVec512> (KMOVQload <t> ptr mem))
|
||||||
|
|
||||||
(Load <t> ptr mem) && t.Size() == 16 => (VMOVDQUload128 ptr mem)
|
(Load <t> ptr mem) && t.Size() == 16 => (VMOVDQUload128 ptr mem)
|
||||||
|
|
||||||
(Store {t} ptr val mem) && t.Size() == 16 => (VMOVDQUstore128 ptr val mem)
|
(Store {t} ptr val mem) && t.Size() == 16 => (VMOVDQUstore128 ptr val mem)
|
||||||
|
|
|
||||||
|
|
@ -234,6 +234,8 @@ func init() {
|
||||||
wfpw = regInfo{inputs: []regMask{w, fp}, outputs: wonly}
|
wfpw = regInfo{inputs: []regMask{w, fp}, outputs: wonly}
|
||||||
wfpkw = regInfo{inputs: []regMask{w, fp, mask}, outputs: wonly}
|
wfpkw = regInfo{inputs: []regMask{w, fp, mask}, outputs: wonly}
|
||||||
|
|
||||||
|
kload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: maskonly}
|
||||||
|
|
||||||
prefreg = regInfo{inputs: []regMask{gpspsbg}}
|
prefreg = regInfo{inputs: []regMask{gpspsbg}}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -1314,6 +1316,8 @@ func init() {
|
||||||
|
|
||||||
{name: "VZEROUPPER", argLength: 0, asm: "VZEROUPPER"},
|
{name: "VZEROUPPER", argLength: 0, asm: "VZEROUPPER"},
|
||||||
{name: "VZEROALL", argLength: 0, asm: "VZEROALL"},
|
{name: "VZEROALL", argLength: 0, asm: "VZEROALL"},
|
||||||
|
|
||||||
|
{name: "KMOVQload", argLength: 2, reg: kload, asm: "KMOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},
|
||||||
}
|
}
|
||||||
|
|
||||||
var AMD64blocks = []blockData{
|
var AMD64blocks = []blockData{
|
||||||
|
|
|
||||||
|
|
@ -666,6 +666,18 @@ var genericOps = []opData{
|
||||||
// XXX SIMD
|
// XXX SIMD
|
||||||
{name: "Add32x4", argLength: 2}, // arg0 + arg1
|
{name: "Add32x4", argLength: 2}, // arg0 + arg1
|
||||||
{name: "ZeroSIMD", argLength: 0},
|
{name: "ZeroSIMD", argLength: 0},
|
||||||
|
{name: "LoadMask8x16", argLength: 2}, // arg0 = ptr, arg1 = mem
|
||||||
|
{name: "LoadMask8x32", argLength: 2}, // arg0 = ptr, arg1 = mem
|
||||||
|
{name: "LoadMask8x64", argLength: 2}, // arg0 = ptr, arg1 = mem
|
||||||
|
{name: "LoadMask16x8", argLength: 2}, // arg0 = ptr, arg1 = mem
|
||||||
|
{name: "LoadMask16x16", argLength: 2}, // arg0 = ptr, arg1 = mem
|
||||||
|
{name: "LoadMask16x32", argLength: 2}, // arg0 = ptr, arg1 = mem
|
||||||
|
{name: "LoadMask32x4", argLength: 2}, // arg0 = ptr, arg1 = mem
|
||||||
|
{name: "LoadMask32x8", argLength: 2}, // arg0 = ptr, arg1 = mem
|
||||||
|
{name: "LoadMask32x16", argLength: 2}, // arg0 = ptr, arg1 = mem
|
||||||
|
{name: "LoadMask64x2", argLength: 2}, // arg0 = ptr, arg1 = mem
|
||||||
|
{name: "LoadMask64x4", argLength: 2}, // arg0 = ptr, arg1 = mem
|
||||||
|
{name: "LoadMask64x8", argLength: 2}, // arg0 = ptr, arg1 = mem
|
||||||
}
|
}
|
||||||
|
|
||||||
// kind controls successors implicit exit
|
// kind controls successors implicit exit
|
||||||
|
|
|
||||||
|
|
@ -1198,6 +1198,7 @@ const (
|
||||||
OpAMD64Zero512
|
OpAMD64Zero512
|
||||||
OpAMD64VZEROUPPER
|
OpAMD64VZEROUPPER
|
||||||
OpAMD64VZEROALL
|
OpAMD64VZEROALL
|
||||||
|
OpAMD64KMOVQload
|
||||||
OpAMD64VADDPS512
|
OpAMD64VADDPS512
|
||||||
OpAMD64VADDPSMasked512
|
OpAMD64VADDPSMasked512
|
||||||
OpAMD64VRCP14PS512
|
OpAMD64VRCP14PS512
|
||||||
|
|
@ -4403,6 +4404,18 @@ const (
|
||||||
OpPrefetchCacheStreamed
|
OpPrefetchCacheStreamed
|
||||||
OpAdd32x4
|
OpAdd32x4
|
||||||
OpZeroSIMD
|
OpZeroSIMD
|
||||||
|
OpLoadMask8x16
|
||||||
|
OpLoadMask8x32
|
||||||
|
OpLoadMask8x64
|
||||||
|
OpLoadMask16x8
|
||||||
|
OpLoadMask16x16
|
||||||
|
OpLoadMask16x32
|
||||||
|
OpLoadMask32x4
|
||||||
|
OpLoadMask32x8
|
||||||
|
OpLoadMask32x16
|
||||||
|
OpLoadMask64x2
|
||||||
|
OpLoadMask64x4
|
||||||
|
OpLoadMask64x8
|
||||||
OpAddFloat32x16
|
OpAddFloat32x16
|
||||||
OpAddMaskedFloat32x16
|
OpAddMaskedFloat32x16
|
||||||
OpApproximateReciprocalFloat32x16
|
OpApproximateReciprocalFloat32x16
|
||||||
|
|
@ -18801,6 +18814,22 @@ var opcodeTable = [...]opInfo{
|
||||||
asm: x86.AVZEROALL,
|
asm: x86.AVZEROALL,
|
||||||
reg: regInfo{},
|
reg: regInfo{},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "KMOVQload",
|
||||||
|
auxType: auxSymOff,
|
||||||
|
argLen: 2,
|
||||||
|
faultOnNilArg0: true,
|
||||||
|
symEffect: SymRead,
|
||||||
|
asm: x86.AKMOVQ,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "VADDPS512",
|
name: "VADDPS512",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
@ -60727,6 +60756,66 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 0,
|
argLen: 0,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "LoadMask8x16",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "LoadMask8x32",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "LoadMask8x64",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "LoadMask16x8",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "LoadMask16x16",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "LoadMask16x32",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "LoadMask32x4",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "LoadMask32x8",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "LoadMask32x16",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "LoadMask64x2",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "LoadMask64x4",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "LoadMask64x8",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "AddFloat32x16",
|
name: "AddFloat32x16",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
|
||||||
|
|
@ -2438,6 +2438,30 @@ func rewriteValueAMD64(v *Value) bool {
|
||||||
return rewriteValueAMD64_OpLessUint8x64(v)
|
return rewriteValueAMD64_OpLessUint8x64(v)
|
||||||
case OpLoad:
|
case OpLoad:
|
||||||
return rewriteValueAMD64_OpLoad(v)
|
return rewriteValueAMD64_OpLoad(v)
|
||||||
|
case OpLoadMask16x16:
|
||||||
|
return rewriteValueAMD64_OpLoadMask16x16(v)
|
||||||
|
case OpLoadMask16x32:
|
||||||
|
return rewriteValueAMD64_OpLoadMask16x32(v)
|
||||||
|
case OpLoadMask16x8:
|
||||||
|
return rewriteValueAMD64_OpLoadMask16x8(v)
|
||||||
|
case OpLoadMask32x16:
|
||||||
|
return rewriteValueAMD64_OpLoadMask32x16(v)
|
||||||
|
case OpLoadMask32x4:
|
||||||
|
return rewriteValueAMD64_OpLoadMask32x4(v)
|
||||||
|
case OpLoadMask32x8:
|
||||||
|
return rewriteValueAMD64_OpLoadMask32x8(v)
|
||||||
|
case OpLoadMask64x2:
|
||||||
|
return rewriteValueAMD64_OpLoadMask64x2(v)
|
||||||
|
case OpLoadMask64x4:
|
||||||
|
return rewriteValueAMD64_OpLoadMask64x4(v)
|
||||||
|
case OpLoadMask64x8:
|
||||||
|
return rewriteValueAMD64_OpLoadMask64x8(v)
|
||||||
|
case OpLoadMask8x16:
|
||||||
|
return rewriteValueAMD64_OpLoadMask8x16(v)
|
||||||
|
case OpLoadMask8x32:
|
||||||
|
return rewriteValueAMD64_OpLoadMask8x32(v)
|
||||||
|
case OpLoadMask8x64:
|
||||||
|
return rewriteValueAMD64_OpLoadMask8x64(v)
|
||||||
case OpLocalAddr:
|
case OpLocalAddr:
|
||||||
return rewriteValueAMD64_OpLocalAddr(v)
|
return rewriteValueAMD64_OpLocalAddr(v)
|
||||||
case OpLsh16x16:
|
case OpLsh16x16:
|
||||||
|
|
@ -40303,6 +40327,222 @@ func rewriteValueAMD64_OpLoad(v *Value) bool {
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
func rewriteValueAMD64_OpLoadMask16x16(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (LoadMask16x16 <t> ptr mem)
|
||||||
|
// result: (VPMOVMToVec16x16 <types.TypeVec256> (KMOVQload <t> ptr mem))
|
||||||
|
for {
|
||||||
|
t := v.Type
|
||||||
|
ptr := v_0
|
||||||
|
mem := v_1
|
||||||
|
v.reset(OpAMD64VPMOVMToVec16x16)
|
||||||
|
v.Type = types.TypeVec256
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
|
||||||
|
v0.AddArg2(ptr, mem)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpLoadMask16x32(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (LoadMask16x32 <t> ptr mem)
|
||||||
|
// result: (VPMOVMToVec16x32 <types.TypeVec512> (KMOVQload <t> ptr mem))
|
||||||
|
for {
|
||||||
|
t := v.Type
|
||||||
|
ptr := v_0
|
||||||
|
mem := v_1
|
||||||
|
v.reset(OpAMD64VPMOVMToVec16x32)
|
||||||
|
v.Type = types.TypeVec512
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
|
||||||
|
v0.AddArg2(ptr, mem)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpLoadMask16x8(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (LoadMask16x8 <t> ptr mem)
|
||||||
|
// result: (VPMOVMToVec16x8 <types.TypeVec128> (KMOVQload <t> ptr mem))
|
||||||
|
for {
|
||||||
|
t := v.Type
|
||||||
|
ptr := v_0
|
||||||
|
mem := v_1
|
||||||
|
v.reset(OpAMD64VPMOVMToVec16x8)
|
||||||
|
v.Type = types.TypeVec128
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
|
||||||
|
v0.AddArg2(ptr, mem)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpLoadMask32x16(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (LoadMask32x16 <t> ptr mem)
|
||||||
|
// result: (VPMOVMToVec32x16 <types.TypeVec512> (KMOVQload <t> ptr mem))
|
||||||
|
for {
|
||||||
|
t := v.Type
|
||||||
|
ptr := v_0
|
||||||
|
mem := v_1
|
||||||
|
v.reset(OpAMD64VPMOVMToVec32x16)
|
||||||
|
v.Type = types.TypeVec512
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
|
||||||
|
v0.AddArg2(ptr, mem)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpLoadMask32x4(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (LoadMask32x4 <t> ptr mem)
|
||||||
|
// result: (VPMOVMToVec32x4 <types.TypeVec128> (KMOVQload <t> ptr mem))
|
||||||
|
for {
|
||||||
|
t := v.Type
|
||||||
|
ptr := v_0
|
||||||
|
mem := v_1
|
||||||
|
v.reset(OpAMD64VPMOVMToVec32x4)
|
||||||
|
v.Type = types.TypeVec128
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
|
||||||
|
v0.AddArg2(ptr, mem)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpLoadMask32x8(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (LoadMask32x8 <t> ptr mem)
|
||||||
|
// result: (VPMOVMToVec32x8 <types.TypeVec256> (KMOVQload <t> ptr mem))
|
||||||
|
for {
|
||||||
|
t := v.Type
|
||||||
|
ptr := v_0
|
||||||
|
mem := v_1
|
||||||
|
v.reset(OpAMD64VPMOVMToVec32x8)
|
||||||
|
v.Type = types.TypeVec256
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
|
||||||
|
v0.AddArg2(ptr, mem)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpLoadMask64x2(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (LoadMask64x2 <t> ptr mem)
|
||||||
|
// result: (VPMOVMToVec64x2 <types.TypeVec128> (KMOVQload <t> ptr mem))
|
||||||
|
for {
|
||||||
|
t := v.Type
|
||||||
|
ptr := v_0
|
||||||
|
mem := v_1
|
||||||
|
v.reset(OpAMD64VPMOVMToVec64x2)
|
||||||
|
v.Type = types.TypeVec128
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
|
||||||
|
v0.AddArg2(ptr, mem)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpLoadMask64x4(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (LoadMask64x4 <t> ptr mem)
|
||||||
|
// result: (VPMOVMToVec64x4 <types.TypeVec256> (KMOVQload <t> ptr mem))
|
||||||
|
for {
|
||||||
|
t := v.Type
|
||||||
|
ptr := v_0
|
||||||
|
mem := v_1
|
||||||
|
v.reset(OpAMD64VPMOVMToVec64x4)
|
||||||
|
v.Type = types.TypeVec256
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
|
||||||
|
v0.AddArg2(ptr, mem)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpLoadMask64x8(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (LoadMask64x8 <t> ptr mem)
|
||||||
|
// result: (VPMOVMToVec64x8 <types.TypeVec512> (KMOVQload <t> ptr mem))
|
||||||
|
for {
|
||||||
|
t := v.Type
|
||||||
|
ptr := v_0
|
||||||
|
mem := v_1
|
||||||
|
v.reset(OpAMD64VPMOVMToVec64x8)
|
||||||
|
v.Type = types.TypeVec512
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
|
||||||
|
v0.AddArg2(ptr, mem)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpLoadMask8x16(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (LoadMask8x16 <t> ptr mem)
|
||||||
|
// result: (VPMOVMToVec8x16 <types.TypeVec128> (KMOVQload <t> ptr mem))
|
||||||
|
for {
|
||||||
|
t := v.Type
|
||||||
|
ptr := v_0
|
||||||
|
mem := v_1
|
||||||
|
v.reset(OpAMD64VPMOVMToVec8x16)
|
||||||
|
v.Type = types.TypeVec128
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
|
||||||
|
v0.AddArg2(ptr, mem)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpLoadMask8x32(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (LoadMask8x32 <t> ptr mem)
|
||||||
|
// result: (VPMOVMToVec8x32 <types.TypeVec256> (KMOVQload <t> ptr mem))
|
||||||
|
for {
|
||||||
|
t := v.Type
|
||||||
|
ptr := v_0
|
||||||
|
mem := v_1
|
||||||
|
v.reset(OpAMD64VPMOVMToVec8x32)
|
||||||
|
v.Type = types.TypeVec256
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
|
||||||
|
v0.AddArg2(ptr, mem)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpLoadMask8x64(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (LoadMask8x64 <t> ptr mem)
|
||||||
|
// result: (VPMOVMToVec8x64 <types.TypeVec512> (KMOVQload <t> ptr mem))
|
||||||
|
for {
|
||||||
|
t := v.Type
|
||||||
|
ptr := v_0
|
||||||
|
mem := v_1
|
||||||
|
v.reset(OpAMD64VPMOVMToVec8x64)
|
||||||
|
v.Type = types.TypeVec512
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
|
||||||
|
v0.AddArg2(ptr, mem)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
func rewriteValueAMD64_OpLocalAddr(v *Value) bool {
|
func rewriteValueAMD64_OpLocalAddr(v *Value) bool {
|
||||||
v_1 := v.Args[1]
|
v_1 := v.Args[1]
|
||||||
v_0 := v.Args[0]
|
v_0 := v.Args[0]
|
||||||
|
|
|
||||||
|
|
@ -1775,6 +1775,22 @@ func simdStore() func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func simdLoadMask(elemBits, lanes int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||||
|
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||||
|
opCodes := map[int]map[int]ssa.Op{
|
||||||
|
8: {16: ssa.OpLoadMask8x16, 32: ssa.OpLoadMask8x32, 64: ssa.OpLoadMask8x64},
|
||||||
|
16: {8: ssa.OpLoadMask16x8, 16: ssa.OpLoadMask16x16, 32: ssa.OpLoadMask16x32},
|
||||||
|
32: {4: ssa.OpLoadMask32x4, 8: ssa.OpLoadMask32x8, 16: ssa.OpLoadMask32x16},
|
||||||
|
64: {2: ssa.OpLoadMask64x2, 4: ssa.OpLoadMask64x4, 8: ssa.OpLoadMask64x8},
|
||||||
|
}
|
||||||
|
op := opCodes[elemBits][lanes]
|
||||||
|
if op == 0 {
|
||||||
|
panic(fmt.Sprintf("Unknown mask shape: Mask%dx%d", elemBits, lanes))
|
||||||
|
}
|
||||||
|
return s.newValue2(op, types.TypeMask, args[0], s.mem())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// findIntrinsic returns a function which builds the SSA equivalent of the
|
// findIntrinsic returns a function which builds the SSA equivalent of the
|
||||||
// function identified by the symbol sym. If sym is not an intrinsic call, returns nil.
|
// function identified by the symbol sym. If sym is not an intrinsic call, returns nil.
|
||||||
func findIntrinsic(sym *types.Sym) intrinsicBuilder {
|
func findIntrinsic(sym *types.Sym) intrinsicBuilder {
|
||||||
|
|
|
||||||
|
|
@ -2132,76 +2132,64 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||||
addF(simdPackage, "Uint64x4.Store", simdStore(), sys.AMD64)
|
addF(simdPackage, "Uint64x4.Store", simdStore(), sys.AMD64)
|
||||||
addF(simdPackage, "LoadUint64x8", simdLoad(), sys.AMD64)
|
addF(simdPackage, "LoadUint64x8", simdLoad(), sys.AMD64)
|
||||||
addF(simdPackage, "Uint64x8.Store", simdStore(), sys.AMD64)
|
addF(simdPackage, "Uint64x8.Store", simdStore(), sys.AMD64)
|
||||||
addF(simdPackage, "LoadMask8x16", simdLoad(), sys.AMD64)
|
|
||||||
addF(simdPackage, "Mask8x16.Store", simdStore(), sys.AMD64)
|
|
||||||
addF(simdPackage, "LoadMask8x32", simdLoad(), sys.AMD64)
|
|
||||||
addF(simdPackage, "Mask8x32.Store", simdStore(), sys.AMD64)
|
|
||||||
addF(simdPackage, "LoadMask8x64", simdLoad(), sys.AMD64)
|
|
||||||
addF(simdPackage, "Mask8x64.Store", simdStore(), sys.AMD64)
|
|
||||||
addF(simdPackage, "LoadMask16x8", simdLoad(), sys.AMD64)
|
|
||||||
addF(simdPackage, "Mask16x8.Store", simdStore(), sys.AMD64)
|
|
||||||
addF(simdPackage, "LoadMask16x16", simdLoad(), sys.AMD64)
|
|
||||||
addF(simdPackage, "Mask16x16.Store", simdStore(), sys.AMD64)
|
|
||||||
addF(simdPackage, "LoadMask16x32", simdLoad(), sys.AMD64)
|
|
||||||
addF(simdPackage, "Mask16x32.Store", simdStore(), sys.AMD64)
|
|
||||||
addF(simdPackage, "LoadMask32x4", simdLoad(), sys.AMD64)
|
|
||||||
addF(simdPackage, "Mask32x4.Store", simdStore(), sys.AMD64)
|
|
||||||
addF(simdPackage, "LoadMask32x8", simdLoad(), sys.AMD64)
|
|
||||||
addF(simdPackage, "Mask32x8.Store", simdStore(), sys.AMD64)
|
|
||||||
addF(simdPackage, "LoadMask32x16", simdLoad(), sys.AMD64)
|
|
||||||
addF(simdPackage, "Mask32x16.Store", simdStore(), sys.AMD64)
|
|
||||||
addF(simdPackage, "LoadMask64x2", simdLoad(), sys.AMD64)
|
|
||||||
addF(simdPackage, "Mask64x2.Store", simdStore(), sys.AMD64)
|
|
||||||
addF(simdPackage, "LoadMask64x4", simdLoad(), sys.AMD64)
|
|
||||||
addF(simdPackage, "Mask64x4.Store", simdStore(), sys.AMD64)
|
|
||||||
addF(simdPackage, "LoadMask64x8", simdLoad(), sys.AMD64)
|
|
||||||
addF(simdPackage, "Mask64x8.Store", simdStore(), sys.AMD64)
|
|
||||||
addF(simdPackage, "Mask8x16.AsInt8x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Mask8x16.AsInt8x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Int8x16.AsMask8x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Int8x16.AsMask8x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Mask8x16.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Mask8x16.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Mask8x16.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Mask8x16.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "LoadMask8x16FromBits", simdLoadMask(8, 16), sys.AMD64)
|
||||||
addF(simdPackage, "Mask8x32.AsInt8x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Mask8x32.AsInt8x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Int8x32.AsMask8x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Int8x32.AsMask8x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Mask8x32.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Mask8x32.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Mask8x32.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Mask8x32.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "LoadMask8x32FromBits", simdLoadMask(8, 32), sys.AMD64)
|
||||||
addF(simdPackage, "Mask8x64.AsInt8x64", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Mask8x64.AsInt8x64", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Int8x64.AsMask8x64", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Int8x64.AsMask8x64", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Mask8x64.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Mask8x64.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Mask8x64.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Mask8x64.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
|
addF(simdPackage, "LoadMask8x64FromBits", simdLoadMask(8, 64), sys.AMD64)
|
||||||
addF(simdPackage, "Mask16x8.AsInt16x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Mask16x8.AsInt16x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Int16x8.AsMask16x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Int16x8.AsMask16x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Mask16x8.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Mask16x8.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Mask16x8.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Mask16x8.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "LoadMask16x8FromBits", simdLoadMask(16, 8), sys.AMD64)
|
||||||
addF(simdPackage, "Mask16x16.AsInt16x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Mask16x16.AsInt16x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Int16x16.AsMask16x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Int16x16.AsMask16x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Mask16x16.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Mask16x16.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Mask16x16.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Mask16x16.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "LoadMask16x16FromBits", simdLoadMask(16, 16), sys.AMD64)
|
||||||
addF(simdPackage, "Mask16x32.AsInt16x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Mask16x32.AsInt16x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Int16x32.AsMask16x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Int16x32.AsMask16x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Mask16x32.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Mask16x32.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Mask16x32.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Mask16x32.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
|
addF(simdPackage, "LoadMask16x32FromBits", simdLoadMask(16, 32), sys.AMD64)
|
||||||
addF(simdPackage, "Mask32x4.AsInt32x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Mask32x4.AsInt32x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Int32x4.AsMask32x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Int32x4.AsMask32x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Mask32x4.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Mask32x4.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Mask32x4.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Mask32x4.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "LoadMask32x4FromBits", simdLoadMask(32, 4), sys.AMD64)
|
||||||
addF(simdPackage, "Mask32x8.AsInt32x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Mask32x8.AsInt32x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Int32x8.AsMask32x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Int32x8.AsMask32x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Mask32x8.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Mask32x8.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Mask32x8.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Mask32x8.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "LoadMask32x8FromBits", simdLoadMask(32, 8), sys.AMD64)
|
||||||
addF(simdPackage, "Mask32x16.AsInt32x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Mask32x16.AsInt32x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Int32x16.AsMask32x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Int32x16.AsMask32x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Mask32x16.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Mask32x16.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Mask32x16.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Mask32x16.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
|
addF(simdPackage, "LoadMask32x16FromBits", simdLoadMask(32, 16), sys.AMD64)
|
||||||
addF(simdPackage, "Mask64x2.AsInt64x2", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Mask64x2.AsInt64x2", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Int64x2.AsMask64x2", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Int64x2.AsMask64x2", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Mask64x2.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Mask64x2.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Mask64x2.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Mask64x2.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "LoadMask64x2FromBits", simdLoadMask(64, 2), sys.AMD64)
|
||||||
addF(simdPackage, "Mask64x4.AsInt64x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Mask64x4.AsInt64x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Int64x4.AsMask64x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Int64x4.AsMask64x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Mask64x4.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Mask64x4.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Mask64x4.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Mask64x4.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "LoadMask64x4FromBits", simdLoadMask(64, 4), sys.AMD64)
|
||||||
addF(simdPackage, "Mask64x8.AsInt64x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Mask64x8.AsInt64x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Int64x8.AsMask64x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Int64x8.AsMask64x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Mask64x8.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Mask64x8.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Mask64x8.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Mask64x8.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
|
addF(simdPackage, "LoadMask64x8FromBits", simdLoadMask(64, 8), sys.AMD64)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -460,3 +460,20 @@ func testMergeLocalswrapper(t *testing.T, op func(simd.Int64x4, simd.Int64x4) si
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestBitMask(t *testing.T) {
|
||||||
|
if !simd.HasAVX512() {
|
||||||
|
t.Skip("Test requires HasAVX512, not available on this hardware")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var bits uint64 = 0b10
|
||||||
|
results := [2]int64{}
|
||||||
|
want := [2]int64{0, 6}
|
||||||
|
m := simd.LoadMask64x2FromBits(&bits)
|
||||||
|
simd.LoadInt64x2Slice([]int64{1, 2}).AddMasked(simd.LoadInt64x2Slice([]int64{3, 4}), m).Store(&results)
|
||||||
|
for i := range 2 {
|
||||||
|
if results[i] != want[i] {
|
||||||
|
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -205,24 +205,48 @@ type Mask8x16 struct {
|
||||||
vals [16]int8
|
vals [16]int8
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Mask8x16FromBits constructs a Mask8x16 from an a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
|
// Only the lower 16 bits of y are used.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func LoadMask8x16FromBits(y *uint64) Mask8x16
|
||||||
|
|
||||||
// Mask16x8 is a 128-bit SIMD vector of 8 int16
|
// Mask16x8 is a 128-bit SIMD vector of 8 int16
|
||||||
type Mask16x8 struct {
|
type Mask16x8 struct {
|
||||||
int16x8 v128
|
int16x8 v128
|
||||||
vals [8]int16
|
vals [8]int16
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Mask16x8FromBits constructs a Mask16x8 from an a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
|
// Only the lower 8 bits of y are used.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func LoadMask16x8FromBits(y *uint64) Mask16x8
|
||||||
|
|
||||||
// Mask32x4 is a 128-bit SIMD vector of 4 int32
|
// Mask32x4 is a 128-bit SIMD vector of 4 int32
|
||||||
type Mask32x4 struct {
|
type Mask32x4 struct {
|
||||||
int32x4 v128
|
int32x4 v128
|
||||||
vals [4]int32
|
vals [4]int32
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Mask32x4FromBits constructs a Mask32x4 from an a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
|
// Only the lower 4 bits of y are used.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func LoadMask32x4FromBits(y *uint64) Mask32x4
|
||||||
|
|
||||||
// Mask64x2 is a 128-bit SIMD vector of 2 int64
|
// Mask64x2 is a 128-bit SIMD vector of 2 int64
|
||||||
type Mask64x2 struct {
|
type Mask64x2 struct {
|
||||||
int64x2 v128
|
int64x2 v128
|
||||||
vals [2]int64
|
vals [2]int64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Mask64x2FromBits constructs a Mask64x2 from an a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
|
// Only the lower 2 bits of y are used.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func LoadMask64x2FromBits(y *uint64) Mask64x2
|
||||||
|
|
||||||
// v256 is a tag type that tells the compiler that this is really 256-bit SIMD
|
// v256 is a tag type that tells the compiler that this is really 256-bit SIMD
|
||||||
type v256 struct {
|
type v256 struct {
|
||||||
_256 struct{}
|
_256 struct{}
|
||||||
|
|
@ -424,24 +448,48 @@ type Mask8x32 struct {
|
||||||
vals [32]int8
|
vals [32]int8
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Mask8x32FromBits constructs a Mask8x32 from an a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
|
// Only the lower 32 bits of y are used.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func LoadMask8x32FromBits(y *uint64) Mask8x32
|
||||||
|
|
||||||
// Mask16x16 is a 256-bit SIMD vector of 16 int16
|
// Mask16x16 is a 256-bit SIMD vector of 16 int16
|
||||||
type Mask16x16 struct {
|
type Mask16x16 struct {
|
||||||
int16x16 v256
|
int16x16 v256
|
||||||
vals [16]int16
|
vals [16]int16
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Mask16x16FromBits constructs a Mask16x16 from an a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
|
// Only the lower 16 bits of y are used.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func LoadMask16x16FromBits(y *uint64) Mask16x16
|
||||||
|
|
||||||
// Mask32x8 is a 256-bit SIMD vector of 8 int32
|
// Mask32x8 is a 256-bit SIMD vector of 8 int32
|
||||||
type Mask32x8 struct {
|
type Mask32x8 struct {
|
||||||
int32x8 v256
|
int32x8 v256
|
||||||
vals [8]int32
|
vals [8]int32
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Mask32x8FromBits constructs a Mask32x8 from an a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
|
// Only the lower 8 bits of y are used.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func LoadMask32x8FromBits(y *uint64) Mask32x8
|
||||||
|
|
||||||
// Mask64x4 is a 256-bit SIMD vector of 4 int64
|
// Mask64x4 is a 256-bit SIMD vector of 4 int64
|
||||||
type Mask64x4 struct {
|
type Mask64x4 struct {
|
||||||
int64x4 v256
|
int64x4 v256
|
||||||
vals [4]int64
|
vals [4]int64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Mask64x4FromBits constructs a Mask64x4 from an a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
|
// Only the lower 4 bits of y are used.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func LoadMask64x4FromBits(y *uint64) Mask64x4
|
||||||
|
|
||||||
// v512 is a tag type that tells the compiler that this is really 512-bit SIMD
|
// v512 is a tag type that tells the compiler that this is really 512-bit SIMD
|
||||||
type v512 struct {
|
type v512 struct {
|
||||||
_512 struct{}
|
_512 struct{}
|
||||||
|
|
@ -643,20 +691,44 @@ type Mask8x64 struct {
|
||||||
vals [64]int8
|
vals [64]int8
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Mask8x64FromBits constructs a Mask8x64 from an a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
|
// Only the lower 64 bits of y are used.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func LoadMask8x64FromBits(y *uint64) Mask8x64
|
||||||
|
|
||||||
// Mask16x32 is a 512-bit SIMD vector of 32 int16
|
// Mask16x32 is a 512-bit SIMD vector of 32 int16
|
||||||
type Mask16x32 struct {
|
type Mask16x32 struct {
|
||||||
int16x32 v512
|
int16x32 v512
|
||||||
vals [32]int16
|
vals [32]int16
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Mask16x32FromBits constructs a Mask16x32 from an a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
|
// Only the lower 32 bits of y are used.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func LoadMask16x32FromBits(y *uint64) Mask16x32
|
||||||
|
|
||||||
// Mask32x16 is a 512-bit SIMD vector of 16 int32
|
// Mask32x16 is a 512-bit SIMD vector of 16 int32
|
||||||
type Mask32x16 struct {
|
type Mask32x16 struct {
|
||||||
int32x16 v512
|
int32x16 v512
|
||||||
vals [16]int32
|
vals [16]int32
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Mask32x16FromBits constructs a Mask32x16 from an a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
|
// Only the lower 16 bits of y are used.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func LoadMask32x16FromBits(y *uint64) Mask32x16
|
||||||
|
|
||||||
// Mask64x8 is a 512-bit SIMD vector of 8 int64
|
// Mask64x8 is a 512-bit SIMD vector of 8 int64
|
||||||
type Mask64x8 struct {
|
type Mask64x8 struct {
|
||||||
int64x8 v512
|
int64x8 v512
|
||||||
vals [8]int64
|
vals [8]int64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Mask64x8FromBits constructs a Mask64x8 from an a bitmap, where 1 means set for the indexed element, 0 means unset.
|
||||||
|
// Only the lower 8 bits of y are used.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func LoadMask64x8FromBits(y *uint64) Mask64x8
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue