mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile, simd: add definitions for VPTERNLOG[DQ]
This includes an non-public intrinsic for testing, and a test. Optimizations using this instruction will follow in another CL. Change-Id: I7f7a93212249a16a30bd1379c717f8a7f9915daf Reviewed-on: https://go-review.googlesource.com/c/go/+/708995 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Junyang Shao <shaojunyang@google.com>
This commit is contained in:
parent
20b3339542
commit
d03634f807
13 changed files with 780 additions and 1 deletions
|
|
@ -1939,6 +1939,22 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VPSHRDQMasked512load:
|
ssa.OpAMD64VPSHRDQMasked512load:
|
||||||
p = simdV2kvloadImm8(s, v)
|
p = simdV2kvloadImm8(s, v)
|
||||||
|
|
||||||
|
case ssa.OpAMD64VPTERNLOGD128,
|
||||||
|
ssa.OpAMD64VPTERNLOGD256,
|
||||||
|
ssa.OpAMD64VPTERNLOGD512,
|
||||||
|
ssa.OpAMD64VPTERNLOGQ128,
|
||||||
|
ssa.OpAMD64VPTERNLOGQ256,
|
||||||
|
ssa.OpAMD64VPTERNLOGQ512:
|
||||||
|
p = simdV31ResultInArg0Imm8(s, v)
|
||||||
|
|
||||||
|
case ssa.OpAMD64VPTERNLOGD128load,
|
||||||
|
ssa.OpAMD64VPTERNLOGD256load,
|
||||||
|
ssa.OpAMD64VPTERNLOGD512load,
|
||||||
|
ssa.OpAMD64VPTERNLOGQ128load,
|
||||||
|
ssa.OpAMD64VPTERNLOGQ256load,
|
||||||
|
ssa.OpAMD64VPTERNLOGQ512load:
|
||||||
|
p = simdV31loadResultInArg0Imm8(s, v)
|
||||||
|
|
||||||
default:
|
default:
|
||||||
// Unknown reg shape
|
// Unknown reg shape
|
||||||
return false
|
return false
|
||||||
|
|
|
||||||
|
|
@ -2095,6 +2095,37 @@ func simdV31ResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog {
|
||||||
return p
|
return p
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func simdV31ResultInArg0Imm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
|
||||||
|
p := s.Prog(v.Op.Asm())
|
||||||
|
p.From.Offset = int64(v.AuxUInt8())
|
||||||
|
p.From.Type = obj.TYPE_CONST
|
||||||
|
|
||||||
|
p.AddRestSourceReg(simdReg(v.Args[2]))
|
||||||
|
p.AddRestSourceReg(simdReg(v.Args[1]))
|
||||||
|
// p.AddRestSourceReg(x86.REG_K0)
|
||||||
|
p.To.Type = obj.TYPE_REG
|
||||||
|
p.To.Reg = simdReg(v)
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
|
// v31loadResultInArg0Imm8
|
||||||
|
// Example instruction:
|
||||||
|
// for (VPTERNLOGD128load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
|
||||||
|
func simdV31loadResultInArg0Imm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
|
||||||
|
sc := v.AuxValAndOff()
|
||||||
|
p := s.Prog(v.Op.Asm())
|
||||||
|
|
||||||
|
p.From.Type = obj.TYPE_CONST
|
||||||
|
p.From.Offset = sc.Val64()
|
||||||
|
|
||||||
|
m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[2].Reg()}
|
||||||
|
ssagen.AddAux2(&m, v, sc.Off64())
|
||||||
|
p.AddRestSource(m)
|
||||||
|
|
||||||
|
p.AddRestSourceReg(simdReg(v.Args[1]))
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
// Example instruction: VFMADD213PD Z2, Z1, K1, Z0
|
// Example instruction: VFMADD213PD Z2, Z1, K1, Z0
|
||||||
func simdV3kvResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog {
|
func simdV3kvResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog {
|
||||||
p := s.Prog(v.Op.Asm())
|
p := s.Prog(v.Op.Asm())
|
||||||
|
|
|
||||||
|
|
@ -1320,6 +1320,18 @@
|
||||||
(moveMaskedUint16x32 x mask) => (VMOVDQU16Masked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
|
(moveMaskedUint16x32 x mask) => (VMOVDQU16Masked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
|
||||||
(moveMaskedUint32x16 x mask) => (VMOVDQU32Masked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
(moveMaskedUint32x16 x mask) => (VMOVDQU32Masked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||||
(moveMaskedUint64x8 x mask) => (VMOVDQU64Masked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
(moveMaskedUint64x8 x mask) => (VMOVDQU64Masked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||||
|
(ternInt32x4 ...) => (VPTERNLOGD128 ...)
|
||||||
|
(ternInt32x8 ...) => (VPTERNLOGD256 ...)
|
||||||
|
(ternInt32x16 ...) => (VPTERNLOGD512 ...)
|
||||||
|
(ternInt64x2 ...) => (VPTERNLOGQ128 ...)
|
||||||
|
(ternInt64x4 ...) => (VPTERNLOGQ256 ...)
|
||||||
|
(ternInt64x8 ...) => (VPTERNLOGQ512 ...)
|
||||||
|
(ternUint32x4 ...) => (VPTERNLOGD128 ...)
|
||||||
|
(ternUint32x8 ...) => (VPTERNLOGD256 ...)
|
||||||
|
(ternUint32x16 ...) => (VPTERNLOGD512 ...)
|
||||||
|
(ternUint64x2 ...) => (VPTERNLOGQ128 ...)
|
||||||
|
(ternUint64x4 ...) => (VPTERNLOGQ256 ...)
|
||||||
|
(ternUint64x8 ...) => (VPTERNLOGQ512 ...)
|
||||||
(VMOVDQU8Masked512 (VPABSB512 x) mask) => (VPABSBMasked512 x mask)
|
(VMOVDQU8Masked512 (VPABSB512 x) mask) => (VPABSBMasked512 x mask)
|
||||||
(VMOVDQU16Masked512 (VPABSW512 x) mask) => (VPABSWMasked512 x mask)
|
(VMOVDQU16Masked512 (VPABSW512 x) mask) => (VPABSWMasked512 x mask)
|
||||||
(VMOVDQU32Masked512 (VPABSD512 x) mask) => (VPABSDMasked512 x mask)
|
(VMOVDQU32Masked512 (VPABSD512 x) mask) => (VPABSDMasked512 x mask)
|
||||||
|
|
@ -2047,3 +2059,9 @@
|
||||||
(VPSRAQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAQMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
|
(VPSRAQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAQMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
|
||||||
(VPSRAQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAQMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
|
(VPSRAQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAQMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
|
||||||
(VPSRAQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAQMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
|
(VPSRAQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAQMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
|
||||||
|
(VPTERNLOGD128 [c] x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGD128load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
|
||||||
|
(VPTERNLOGD256 [c] x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGD256load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
|
||||||
|
(VPTERNLOGD512 [c] x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGD512load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
|
||||||
|
(VPTERNLOGQ128 [c] x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGQ128load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
|
||||||
|
(VPTERNLOGQ256 [c] x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGQ256load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
|
||||||
|
(VPTERNLOGQ512 [c] x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
|
||||||
|
|
|
||||||
|
|
@ -1322,6 +1322,12 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VPSRAQMasked128const", argLength: 2, reg: wkw, asm: "VPSRAQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VPSRAQMasked128const", argLength: 2, reg: wkw, asm: "VPSRAQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPSRAQMasked256const", argLength: 2, reg: wkw, asm: "VPSRAQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VPSRAQMasked256const", argLength: 2, reg: wkw, asm: "VPSRAQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPSRAQMasked512const", argLength: 2, reg: wkw, asm: "VPSRAQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPSRAQMasked512const", argLength: 2, reg: wkw, asm: "VPSRAQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
{name: "VPTERNLOGD128", argLength: 3, reg: w31, asm: "VPTERNLOGD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||||
|
{name: "VPTERNLOGD256", argLength: 3, reg: w31, asm: "VPTERNLOGD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||||
|
{name: "VPTERNLOGD512", argLength: 3, reg: w31, asm: "VPTERNLOGD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||||
|
{name: "VPTERNLOGQ128", argLength: 3, reg: w31, asm: "VPTERNLOGQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||||
|
{name: "VPTERNLOGQ256", argLength: 3, reg: w31, asm: "VPTERNLOGQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||||
|
{name: "VPTERNLOGQ512", argLength: 3, reg: w31, asm: "VPTERNLOGQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||||
{name: "VPABSD512load", argLength: 2, reg: w11load, asm: "VPABSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
{name: "VPABSD512load", argLength: 2, reg: w11load, asm: "VPABSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||||
{name: "VPABSQ128load", argLength: 2, reg: w11load, asm: "VPABSQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
{name: "VPABSQ128load", argLength: 2, reg: w11load, asm: "VPABSQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||||
{name: "VPABSQ256load", argLength: 2, reg: w11load, asm: "VPABSQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
{name: "VPABSQ256load", argLength: 2, reg: w11load, asm: "VPABSQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
|
@ -1870,5 +1876,11 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VPSRAQMasked128constload", argLength: 3, reg: wkwload, asm: "VPSRAQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
{name: "VPSRAQMasked128constload", argLength: 3, reg: wkwload, asm: "VPSRAQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
{name: "VPSRAQMasked256constload", argLength: 3, reg: wkwload, asm: "VPSRAQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
{name: "VPSRAQMasked256constload", argLength: 3, reg: wkwload, asm: "VPSRAQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
{name: "VPSRAQMasked512constload", argLength: 3, reg: wkwload, asm: "VPSRAQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
{name: "VPSRAQMasked512constload", argLength: 3, reg: wkwload, asm: "VPSRAQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPTERNLOGD128load", argLength: 4, reg: w31load, asm: "VPTERNLOGD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: true},
|
||||||
|
{name: "VPTERNLOGD256load", argLength: 4, reg: w31load, asm: "VPTERNLOGD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: true},
|
||||||
|
{name: "VPTERNLOGD512load", argLength: 4, reg: w31load, asm: "VPTERNLOGD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: true},
|
||||||
|
{name: "VPTERNLOGQ128load", argLength: 4, reg: w31load, asm: "VPTERNLOGQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: true},
|
||||||
|
{name: "VPTERNLOGQ256load", argLength: 4, reg: w31load, asm: "VPTERNLOGQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: true},
|
||||||
|
{name: "VPTERNLOGQ512load", argLength: 4, reg: w31load, asm: "VPTERNLOGQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: true},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1288,5 +1288,17 @@ func simdGenericOps() []opData {
|
||||||
{name: "concatSelectedConstantInt64x2", argLength: 2, commutative: false, aux: "UInt8"},
|
{name: "concatSelectedConstantInt64x2", argLength: 2, commutative: false, aux: "UInt8"},
|
||||||
{name: "concatSelectedConstantUint32x4", argLength: 2, commutative: false, aux: "UInt8"},
|
{name: "concatSelectedConstantUint32x4", argLength: 2, commutative: false, aux: "UInt8"},
|
||||||
{name: "concatSelectedConstantUint64x2", argLength: 2, commutative: false, aux: "UInt8"},
|
{name: "concatSelectedConstantUint64x2", argLength: 2, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "ternInt32x4", argLength: 3, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "ternInt32x8", argLength: 3, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "ternInt32x16", argLength: 3, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "ternInt64x2", argLength: 3, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "ternInt64x4", argLength: 3, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "ternInt64x8", argLength: 3, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "ternUint32x4", argLength: 3, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "ternUint32x8", argLength: 3, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "ternUint32x16", argLength: 3, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "ternUint64x2", argLength: 3, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "ternUint64x4", argLength: 3, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "ternUint64x8", argLength: 3, commutative: false, aux: "UInt8"},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2562,6 +2562,12 @@ const (
|
||||||
OpAMD64VPSRAQMasked128const
|
OpAMD64VPSRAQMasked128const
|
||||||
OpAMD64VPSRAQMasked256const
|
OpAMD64VPSRAQMasked256const
|
||||||
OpAMD64VPSRAQMasked512const
|
OpAMD64VPSRAQMasked512const
|
||||||
|
OpAMD64VPTERNLOGD128
|
||||||
|
OpAMD64VPTERNLOGD256
|
||||||
|
OpAMD64VPTERNLOGD512
|
||||||
|
OpAMD64VPTERNLOGQ128
|
||||||
|
OpAMD64VPTERNLOGQ256
|
||||||
|
OpAMD64VPTERNLOGQ512
|
||||||
OpAMD64VPABSD512load
|
OpAMD64VPABSD512load
|
||||||
OpAMD64VPABSQ128load
|
OpAMD64VPABSQ128load
|
||||||
OpAMD64VPABSQ256load
|
OpAMD64VPABSQ256load
|
||||||
|
|
@ -3110,6 +3116,12 @@ const (
|
||||||
OpAMD64VPSRAQMasked128constload
|
OpAMD64VPSRAQMasked128constload
|
||||||
OpAMD64VPSRAQMasked256constload
|
OpAMD64VPSRAQMasked256constload
|
||||||
OpAMD64VPSRAQMasked512constload
|
OpAMD64VPSRAQMasked512constload
|
||||||
|
OpAMD64VPTERNLOGD128load
|
||||||
|
OpAMD64VPTERNLOGD256load
|
||||||
|
OpAMD64VPTERNLOGD512load
|
||||||
|
OpAMD64VPTERNLOGQ128load
|
||||||
|
OpAMD64VPTERNLOGQ256load
|
||||||
|
OpAMD64VPTERNLOGQ512load
|
||||||
|
|
||||||
OpARMADD
|
OpARMADD
|
||||||
OpARMADDconst
|
OpARMADDconst
|
||||||
|
|
@ -6669,6 +6681,18 @@ const (
|
||||||
OpconcatSelectedConstantInt64x2
|
OpconcatSelectedConstantInt64x2
|
||||||
OpconcatSelectedConstantUint32x4
|
OpconcatSelectedConstantUint32x4
|
||||||
OpconcatSelectedConstantUint64x2
|
OpconcatSelectedConstantUint64x2
|
||||||
|
OpternInt32x4
|
||||||
|
OpternInt32x8
|
||||||
|
OpternInt32x16
|
||||||
|
OpternInt64x2
|
||||||
|
OpternInt64x4
|
||||||
|
OpternInt64x8
|
||||||
|
OpternUint32x4
|
||||||
|
OpternUint32x8
|
||||||
|
OpternUint32x16
|
||||||
|
OpternUint64x2
|
||||||
|
OpternUint64x4
|
||||||
|
OpternUint64x8
|
||||||
)
|
)
|
||||||
|
|
||||||
var opcodeTable = [...]opInfo{
|
var opcodeTable = [...]opInfo{
|
||||||
|
|
@ -39366,6 +39390,108 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "VPTERNLOGD128",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 3,
|
||||||
|
resultInArg0: true,
|
||||||
|
asm: x86.AVPTERNLOGD,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPTERNLOGD256",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 3,
|
||||||
|
resultInArg0: true,
|
||||||
|
asm: x86.AVPTERNLOGD,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPTERNLOGD512",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 3,
|
||||||
|
resultInArg0: true,
|
||||||
|
asm: x86.AVPTERNLOGD,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPTERNLOGQ128",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 3,
|
||||||
|
resultInArg0: true,
|
||||||
|
asm: x86.AVPTERNLOGQ,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPTERNLOGQ256",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 3,
|
||||||
|
resultInArg0: true,
|
||||||
|
asm: x86.AVPTERNLOGQ,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPTERNLOGQ512",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 3,
|
||||||
|
resultInArg0: true,
|
||||||
|
asm: x86.AVPTERNLOGQ,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "VPABSD512load",
|
name: "VPABSD512load",
|
||||||
auxType: auxSymOff,
|
auxType: auxSymOff,
|
||||||
|
|
@ -48504,6 +48630,114 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "VPTERNLOGD128load",
|
||||||
|
auxType: auxSymValAndOff,
|
||||||
|
argLen: 4,
|
||||||
|
resultInArg0: true,
|
||||||
|
symEffect: SymRead,
|
||||||
|
asm: x86.AVPTERNLOGD,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPTERNLOGD256load",
|
||||||
|
auxType: auxSymValAndOff,
|
||||||
|
argLen: 4,
|
||||||
|
resultInArg0: true,
|
||||||
|
symEffect: SymRead,
|
||||||
|
asm: x86.AVPTERNLOGD,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPTERNLOGD512load",
|
||||||
|
auxType: auxSymValAndOff,
|
||||||
|
argLen: 4,
|
||||||
|
resultInArg0: true,
|
||||||
|
symEffect: SymRead,
|
||||||
|
asm: x86.AVPTERNLOGD,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPTERNLOGQ128load",
|
||||||
|
auxType: auxSymValAndOff,
|
||||||
|
argLen: 4,
|
||||||
|
resultInArg0: true,
|
||||||
|
symEffect: SymRead,
|
||||||
|
asm: x86.AVPTERNLOGQ,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPTERNLOGQ256load",
|
||||||
|
auxType: auxSymValAndOff,
|
||||||
|
argLen: 4,
|
||||||
|
resultInArg0: true,
|
||||||
|
symEffect: SymRead,
|
||||||
|
asm: x86.AVPTERNLOGQ,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPTERNLOGQ512load",
|
||||||
|
auxType: auxSymValAndOff,
|
||||||
|
argLen: 4,
|
||||||
|
resultInArg0: true,
|
||||||
|
symEffect: SymRead,
|
||||||
|
asm: x86.AVPTERNLOGQ,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
name: "ADD",
|
name: "ADD",
|
||||||
|
|
@ -82840,6 +83074,78 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "ternInt32x4",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ternInt32x8",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ternInt32x16",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ternInt64x2",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ternInt64x4",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ternInt64x8",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ternUint32x4",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ternUint32x8",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ternUint32x16",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ternUint64x2",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ternUint64x4",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ternUint64x8",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o Op) Asm() obj.As { return opcodeTable[o].asm }
|
func (o Op) Asm() obj.As { return opcodeTable[o].asm }
|
||||||
|
|
|
||||||
|
|
@ -1609,6 +1609,18 @@ func rewriteValueAMD64(v *Value) bool {
|
||||||
return rewriteValueAMD64_OpAMD64VPSUBQMasked256(v)
|
return rewriteValueAMD64_OpAMD64VPSUBQMasked256(v)
|
||||||
case OpAMD64VPSUBQMasked512:
|
case OpAMD64VPSUBQMasked512:
|
||||||
return rewriteValueAMD64_OpAMD64VPSUBQMasked512(v)
|
return rewriteValueAMD64_OpAMD64VPSUBQMasked512(v)
|
||||||
|
case OpAMD64VPTERNLOGD128:
|
||||||
|
return rewriteValueAMD64_OpAMD64VPTERNLOGD128(v)
|
||||||
|
case OpAMD64VPTERNLOGD256:
|
||||||
|
return rewriteValueAMD64_OpAMD64VPTERNLOGD256(v)
|
||||||
|
case OpAMD64VPTERNLOGD512:
|
||||||
|
return rewriteValueAMD64_OpAMD64VPTERNLOGD512(v)
|
||||||
|
case OpAMD64VPTERNLOGQ128:
|
||||||
|
return rewriteValueAMD64_OpAMD64VPTERNLOGQ128(v)
|
||||||
|
case OpAMD64VPTERNLOGQ256:
|
||||||
|
return rewriteValueAMD64_OpAMD64VPTERNLOGQ256(v)
|
||||||
|
case OpAMD64VPTERNLOGQ512:
|
||||||
|
return rewriteValueAMD64_OpAMD64VPTERNLOGQ512(v)
|
||||||
case OpAMD64VPUNPCKHDQ512:
|
case OpAMD64VPUNPCKHDQ512:
|
||||||
return rewriteValueAMD64_OpAMD64VPUNPCKHDQ512(v)
|
return rewriteValueAMD64_OpAMD64VPUNPCKHDQ512(v)
|
||||||
case OpAMD64VPUNPCKHQDQ512:
|
case OpAMD64VPUNPCKHQDQ512:
|
||||||
|
|
@ -6061,6 +6073,42 @@ func rewriteValueAMD64(v *Value) bool {
|
||||||
return rewriteValueAMD64_OpmoveMaskedUint64x8(v)
|
return rewriteValueAMD64_OpmoveMaskedUint64x8(v)
|
||||||
case OpmoveMaskedUint8x64:
|
case OpmoveMaskedUint8x64:
|
||||||
return rewriteValueAMD64_OpmoveMaskedUint8x64(v)
|
return rewriteValueAMD64_OpmoveMaskedUint8x64(v)
|
||||||
|
case OpternInt32x16:
|
||||||
|
v.Op = OpAMD64VPTERNLOGD512
|
||||||
|
return true
|
||||||
|
case OpternInt32x4:
|
||||||
|
v.Op = OpAMD64VPTERNLOGD128
|
||||||
|
return true
|
||||||
|
case OpternInt32x8:
|
||||||
|
v.Op = OpAMD64VPTERNLOGD256
|
||||||
|
return true
|
||||||
|
case OpternInt64x2:
|
||||||
|
v.Op = OpAMD64VPTERNLOGQ128
|
||||||
|
return true
|
||||||
|
case OpternInt64x4:
|
||||||
|
v.Op = OpAMD64VPTERNLOGQ256
|
||||||
|
return true
|
||||||
|
case OpternInt64x8:
|
||||||
|
v.Op = OpAMD64VPTERNLOGQ512
|
||||||
|
return true
|
||||||
|
case OpternUint32x16:
|
||||||
|
v.Op = OpAMD64VPTERNLOGD512
|
||||||
|
return true
|
||||||
|
case OpternUint32x4:
|
||||||
|
v.Op = OpAMD64VPTERNLOGD128
|
||||||
|
return true
|
||||||
|
case OpternUint32x8:
|
||||||
|
v.Op = OpAMD64VPTERNLOGD256
|
||||||
|
return true
|
||||||
|
case OpternUint64x2:
|
||||||
|
v.Op = OpAMD64VPTERNLOGQ128
|
||||||
|
return true
|
||||||
|
case OpternUint64x4:
|
||||||
|
v.Op = OpAMD64VPTERNLOGQ256
|
||||||
|
return true
|
||||||
|
case OpternUint64x8:
|
||||||
|
v.Op = OpAMD64VPTERNLOGQ512
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
@ -45655,6 +45703,186 @@ func rewriteValueAMD64_OpAMD64VPSUBQMasked512(v *Value) bool {
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
func rewriteValueAMD64_OpAMD64VPTERNLOGD128(v *Value) bool {
|
||||||
|
v_2 := v.Args[2]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
// match: (VPTERNLOGD128 [c] x y l:(VMOVDQUload128 {sym} [off] ptr mem))
|
||||||
|
// cond: canMergeLoad(v, l) && clobber(l)
|
||||||
|
// result: (VPTERNLOGD128load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
|
||||||
|
for {
|
||||||
|
c := auxIntToUint8(v.AuxInt)
|
||||||
|
x := v_0
|
||||||
|
y := v_1
|
||||||
|
l := v_2
|
||||||
|
if l.Op != OpAMD64VMOVDQUload128 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
off := auxIntToInt32(l.AuxInt)
|
||||||
|
sym := auxToSym(l.Aux)
|
||||||
|
mem := l.Args[1]
|
||||||
|
ptr := l.Args[0]
|
||||||
|
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpAMD64VPTERNLOGD128load)
|
||||||
|
v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
|
||||||
|
v.Aux = symToAux(sym)
|
||||||
|
v.AddArg4(x, y, ptr, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpAMD64VPTERNLOGD256(v *Value) bool {
|
||||||
|
v_2 := v.Args[2]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
// match: (VPTERNLOGD256 [c] x y l:(VMOVDQUload256 {sym} [off] ptr mem))
|
||||||
|
// cond: canMergeLoad(v, l) && clobber(l)
|
||||||
|
// result: (VPTERNLOGD256load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
|
||||||
|
for {
|
||||||
|
c := auxIntToUint8(v.AuxInt)
|
||||||
|
x := v_0
|
||||||
|
y := v_1
|
||||||
|
l := v_2
|
||||||
|
if l.Op != OpAMD64VMOVDQUload256 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
off := auxIntToInt32(l.AuxInt)
|
||||||
|
sym := auxToSym(l.Aux)
|
||||||
|
mem := l.Args[1]
|
||||||
|
ptr := l.Args[0]
|
||||||
|
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpAMD64VPTERNLOGD256load)
|
||||||
|
v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
|
||||||
|
v.Aux = symToAux(sym)
|
||||||
|
v.AddArg4(x, y, ptr, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpAMD64VPTERNLOGD512(v *Value) bool {
|
||||||
|
v_2 := v.Args[2]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
// match: (VPTERNLOGD512 [c] x y l:(VMOVDQUload512 {sym} [off] ptr mem))
|
||||||
|
// cond: canMergeLoad(v, l) && clobber(l)
|
||||||
|
// result: (VPTERNLOGD512load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
|
||||||
|
for {
|
||||||
|
c := auxIntToUint8(v.AuxInt)
|
||||||
|
x := v_0
|
||||||
|
y := v_1
|
||||||
|
l := v_2
|
||||||
|
if l.Op != OpAMD64VMOVDQUload512 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
off := auxIntToInt32(l.AuxInt)
|
||||||
|
sym := auxToSym(l.Aux)
|
||||||
|
mem := l.Args[1]
|
||||||
|
ptr := l.Args[0]
|
||||||
|
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpAMD64VPTERNLOGD512load)
|
||||||
|
v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
|
||||||
|
v.Aux = symToAux(sym)
|
||||||
|
v.AddArg4(x, y, ptr, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpAMD64VPTERNLOGQ128(v *Value) bool {
|
||||||
|
v_2 := v.Args[2]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
// match: (VPTERNLOGQ128 [c] x y l:(VMOVDQUload128 {sym} [off] ptr mem))
|
||||||
|
// cond: canMergeLoad(v, l) && clobber(l)
|
||||||
|
// result: (VPTERNLOGQ128load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
|
||||||
|
for {
|
||||||
|
c := auxIntToUint8(v.AuxInt)
|
||||||
|
x := v_0
|
||||||
|
y := v_1
|
||||||
|
l := v_2
|
||||||
|
if l.Op != OpAMD64VMOVDQUload128 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
off := auxIntToInt32(l.AuxInt)
|
||||||
|
sym := auxToSym(l.Aux)
|
||||||
|
mem := l.Args[1]
|
||||||
|
ptr := l.Args[0]
|
||||||
|
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpAMD64VPTERNLOGQ128load)
|
||||||
|
v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
|
||||||
|
v.Aux = symToAux(sym)
|
||||||
|
v.AddArg4(x, y, ptr, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpAMD64VPTERNLOGQ256(v *Value) bool {
|
||||||
|
v_2 := v.Args[2]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
// match: (VPTERNLOGQ256 [c] x y l:(VMOVDQUload256 {sym} [off] ptr mem))
|
||||||
|
// cond: canMergeLoad(v, l) && clobber(l)
|
||||||
|
// result: (VPTERNLOGQ256load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
|
||||||
|
for {
|
||||||
|
c := auxIntToUint8(v.AuxInt)
|
||||||
|
x := v_0
|
||||||
|
y := v_1
|
||||||
|
l := v_2
|
||||||
|
if l.Op != OpAMD64VMOVDQUload256 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
off := auxIntToInt32(l.AuxInt)
|
||||||
|
sym := auxToSym(l.Aux)
|
||||||
|
mem := l.Args[1]
|
||||||
|
ptr := l.Args[0]
|
||||||
|
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpAMD64VPTERNLOGQ256load)
|
||||||
|
v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
|
||||||
|
v.Aux = symToAux(sym)
|
||||||
|
v.AddArg4(x, y, ptr, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpAMD64VPTERNLOGQ512(v *Value) bool {
|
||||||
|
v_2 := v.Args[2]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
// match: (VPTERNLOGQ512 [c] x y l:(VMOVDQUload512 {sym} [off] ptr mem))
|
||||||
|
// cond: canMergeLoad(v, l) && clobber(l)
|
||||||
|
// result: (VPTERNLOGQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
|
||||||
|
for {
|
||||||
|
c := auxIntToUint8(v.AuxInt)
|
||||||
|
x := v_0
|
||||||
|
y := v_1
|
||||||
|
l := v_2
|
||||||
|
if l.Op != OpAMD64VMOVDQUload512 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
off := auxIntToInt32(l.AuxInt)
|
||||||
|
sym := auxToSym(l.Aux)
|
||||||
|
mem := l.Args[1]
|
||||||
|
ptr := l.Args[0]
|
||||||
|
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpAMD64VPTERNLOGQ512load)
|
||||||
|
v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
|
||||||
|
v.Aux = symToAux(sym)
|
||||||
|
v.AddArg4(x, y, ptr, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
func rewriteValueAMD64_OpAMD64VPUNPCKHDQ512(v *Value) bool {
|
func rewriteValueAMD64_OpAMD64VPUNPCKHDQ512(v *Value) bool {
|
||||||
v_1 := v.Args[1]
|
v_1 := v.Args[1]
|
||||||
v_0 := v.Args[0]
|
v_0 := v.Args[0]
|
||||||
|
|
|
||||||
|
|
@ -1296,6 +1296,18 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||||
addF(simdPackage, "Uint16x32.moveMasked", opLen2(ssa.OpmoveMaskedUint16x32, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Uint16x32.moveMasked", opLen2(ssa.OpmoveMaskedUint16x32, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Uint32x16.moveMasked", opLen2(ssa.OpmoveMaskedUint32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Uint32x16.moveMasked", opLen2(ssa.OpmoveMaskedUint32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Uint64x8.moveMasked", opLen2(ssa.OpmoveMaskedUint64x8, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Uint64x8.moveMasked", opLen2(ssa.OpmoveMaskedUint64x8, types.TypeVec512), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int32x4.tern", opLen3Imm8(ssa.OpternInt32x4, types.TypeVec128, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int32x8.tern", opLen3Imm8(ssa.OpternInt32x8, types.TypeVec256, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int32x16.tern", opLen3Imm8(ssa.OpternInt32x16, types.TypeVec512, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int64x2.tern", opLen3Imm8(ssa.OpternInt64x2, types.TypeVec128, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int64x4.tern", opLen3Imm8(ssa.OpternInt64x4, types.TypeVec256, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int64x8.tern", opLen3Imm8(ssa.OpternInt64x8, types.TypeVec512, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint32x4.tern", opLen3Imm8(ssa.OpternUint32x4, types.TypeVec128, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint32x8.tern", opLen3Imm8(ssa.OpternUint32x8, types.TypeVec256, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint32x16.tern", opLen3Imm8(ssa.OpternUint32x16, types.TypeVec512, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint64x2.tern", opLen3Imm8(ssa.OpternUint64x2, types.TypeVec128, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint64x4.tern", opLen3Imm8(ssa.OpternUint64x4, types.TypeVec256, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint64x8.tern", opLen3Imm8(ssa.OpternUint64x8, types.TypeVec512, 0), sys.AMD64)
|
||||||
addF(simdPackage, "Float32x4.AsFloat64x2", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Float32x4.AsFloat64x2", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Float32x4.AsInt8x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Float32x4.AsInt8x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
addF(simdPackage, "Float32x4.AsInt16x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
addF(simdPackage, "Float32x4.AsInt16x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
|
||||||
|
|
|
||||||
|
|
@ -94,6 +94,8 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
|
||||||
"v2kloadImm8",
|
"v2kloadImm8",
|
||||||
"v2kkloadImm8",
|
"v2kkloadImm8",
|
||||||
"v2kvloadImm8",
|
"v2kvloadImm8",
|
||||||
|
"v31ResultInArg0Imm8",
|
||||||
|
"v31loadResultInArg0Imm8",
|
||||||
}
|
}
|
||||||
regInfoSet := map[string][]string{}
|
regInfoSet := map[string][]string{}
|
||||||
for _, key := range regInfoKeys {
|
for _, key := range regInfoKeys {
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,11 @@
|
||||||
commutative: true
|
commutative: true
|
||||||
documentation: !string |-
|
documentation: !string |-
|
||||||
// NAME performs a bitwise XOR operation between two vectors.
|
// NAME performs a bitwise XOR operation between two vectors.
|
||||||
|
- go: tern
|
||||||
|
commutative: false
|
||||||
|
documentation: !string |-
|
||||||
|
// NAME performs a logical operation on three vectors based on the 8-bit truth table.
|
||||||
|
// Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
|
||||||
|
|
||||||
# We also have PTEST and VPTERNLOG, those should be hidden from the users
|
# We also have PTEST and VPTERNLOG, those should be hidden from the users
|
||||||
# and only appear in rewrite rules.
|
# and only appear in rewrite rules.
|
||||||
|
|
|
||||||
|
|
@ -125,4 +125,18 @@
|
||||||
asm: "VPXORD" # Fill in the gap, Or is missing for Uint16x32 and Int16x32
|
asm: "VPXORD" # Fill in the gap, Or is missing for Uint16x32 and Int16x32
|
||||||
inVariant: []
|
inVariant: []
|
||||||
in: *twoI16x32
|
in: *twoI16x32
|
||||||
out: *oneI16x32
|
out: *oneI16x32
|
||||||
|
|
||||||
|
- go: tern
|
||||||
|
asm: "VPTERNLOGD|VPTERNLOGQ"
|
||||||
|
in:
|
||||||
|
- &tern_op
|
||||||
|
go: $t
|
||||||
|
- *tern_op
|
||||||
|
- *tern_op
|
||||||
|
- class: immediate
|
||||||
|
immOffset: 0
|
||||||
|
name: table
|
||||||
|
inVariant: []
|
||||||
|
out:
|
||||||
|
- *tern_op
|
||||||
|
|
|
||||||
|
|
@ -7872,6 +7872,104 @@ func (x Uint32x16) moveMasked(mask Mask32x16) Uint32x16
|
||||||
// Asm: VMOVDQU64, CPU Feature: AVX512
|
// Asm: VMOVDQU64, CPU Feature: AVX512
|
||||||
func (x Uint64x8) moveMasked(mask Mask64x8) Uint64x8
|
func (x Uint64x8) moveMasked(mask Mask64x8) Uint64x8
|
||||||
|
|
||||||
|
/* tern */
|
||||||
|
|
||||||
|
// tern performs a logical operation on three vectors based on the 8-bit truth table.
|
||||||
|
// Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
|
||||||
|
//
|
||||||
|
// table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPTERNLOGD, CPU Feature: AVX512
|
||||||
|
func (x Int32x4) tern(table uint8, y Int32x4, z Int32x4) Int32x4
|
||||||
|
|
||||||
|
// tern performs a logical operation on three vectors based on the 8-bit truth table.
|
||||||
|
// Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
|
||||||
|
//
|
||||||
|
// table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPTERNLOGD, CPU Feature: AVX512
|
||||||
|
func (x Int32x8) tern(table uint8, y Int32x8, z Int32x8) Int32x8
|
||||||
|
|
||||||
|
// tern performs a logical operation on three vectors based on the 8-bit truth table.
|
||||||
|
// Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
|
||||||
|
//
|
||||||
|
// table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPTERNLOGD, CPU Feature: AVX512
|
||||||
|
func (x Int32x16) tern(table uint8, y Int32x16, z Int32x16) Int32x16
|
||||||
|
|
||||||
|
// tern performs a logical operation on three vectors based on the 8-bit truth table.
|
||||||
|
// Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
|
||||||
|
//
|
||||||
|
// table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPTERNLOGQ, CPU Feature: AVX512
|
||||||
|
func (x Int64x2) tern(table uint8, y Int64x2, z Int64x2) Int64x2
|
||||||
|
|
||||||
|
// tern performs a logical operation on three vectors based on the 8-bit truth table.
|
||||||
|
// Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
|
||||||
|
//
|
||||||
|
// table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPTERNLOGQ, CPU Feature: AVX512
|
||||||
|
func (x Int64x4) tern(table uint8, y Int64x4, z Int64x4) Int64x4
|
||||||
|
|
||||||
|
// tern performs a logical operation on three vectors based on the 8-bit truth table.
|
||||||
|
// Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
|
||||||
|
//
|
||||||
|
// table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPTERNLOGQ, CPU Feature: AVX512
|
||||||
|
func (x Int64x8) tern(table uint8, y Int64x8, z Int64x8) Int64x8
|
||||||
|
|
||||||
|
// tern performs a logical operation on three vectors based on the 8-bit truth table.
|
||||||
|
// Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
|
||||||
|
//
|
||||||
|
// table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPTERNLOGD, CPU Feature: AVX512
|
||||||
|
func (x Uint32x4) tern(table uint8, y Uint32x4, z Uint32x4) Uint32x4
|
||||||
|
|
||||||
|
// tern performs a logical operation on three vectors based on the 8-bit truth table.
|
||||||
|
// Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
|
||||||
|
//
|
||||||
|
// table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPTERNLOGD, CPU Feature: AVX512
|
||||||
|
func (x Uint32x8) tern(table uint8, y Uint32x8, z Uint32x8) Uint32x8
|
||||||
|
|
||||||
|
// tern performs a logical operation on three vectors based on the 8-bit truth table.
|
||||||
|
// Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
|
||||||
|
//
|
||||||
|
// table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPTERNLOGD, CPU Feature: AVX512
|
||||||
|
func (x Uint32x16) tern(table uint8, y Uint32x16, z Uint32x16) Uint32x16
|
||||||
|
|
||||||
|
// tern performs a logical operation on three vectors based on the 8-bit truth table.
|
||||||
|
// Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
|
||||||
|
//
|
||||||
|
// table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPTERNLOGQ, CPU Feature: AVX512
|
||||||
|
func (x Uint64x2) tern(table uint8, y Uint64x2, z Uint64x2) Uint64x2
|
||||||
|
|
||||||
|
// tern performs a logical operation on three vectors based on the 8-bit truth table.
|
||||||
|
// Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
|
||||||
|
//
|
||||||
|
// table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPTERNLOGQ, CPU Feature: AVX512
|
||||||
|
func (x Uint64x4) tern(table uint8, y Uint64x4, z Uint64x4) Uint64x4
|
||||||
|
|
||||||
|
// tern performs a logical operation on three vectors based on the 8-bit truth table.
|
||||||
|
// Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
|
||||||
|
//
|
||||||
|
// table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPTERNLOGQ, CPU Feature: AVX512
|
||||||
|
func (x Uint64x8) tern(table uint8, y Uint64x8, z Uint64x8) Uint64x8
|
||||||
|
|
||||||
// Float64x2 converts from Float32x4 to Float64x2
|
// Float64x2 converts from Float32x4 to Float64x2
|
||||||
func (from Float32x4) AsFloat64x2() (to Float64x2)
|
func (from Float32x4) AsFloat64x2() (to Float64x2)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -47,6 +47,31 @@ func TestConcatSelectedConstantGrouped32(t *testing.T) {
|
||||||
test_helpers.CheckSlices[uint32](t, a, []uint32{2, 0, 5, 7, 10, 8, 13, 15})
|
test_helpers.CheckSlices[uint32](t, a, []uint32{2, 0, 5, 7, 10, 8, 13, 15})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestTern(t *testing.T) {
|
||||||
|
if !HasAVX512() {
|
||||||
|
t.Skip("This test needs AVX512")
|
||||||
|
}
|
||||||
|
x := LoadInt32x8Slice([]int32{0, 0, 0, 0, 1, 1, 1, 1})
|
||||||
|
y := LoadInt32x8Slice([]int32{0, 0, 1, 1, 0, 0, 1, 1})
|
||||||
|
z := LoadInt32x8Slice([]int32{0, 1, 0, 1, 0, 1, 0, 1})
|
||||||
|
|
||||||
|
foo := func(w Int32x8, k uint8) {
|
||||||
|
a := make([]int32, 8)
|
||||||
|
w.StoreSlice(a)
|
||||||
|
t.Logf("For k=%0b, w=%v", k, a)
|
||||||
|
for i, b := range a {
|
||||||
|
if (int32(k)>>i)&1 != b {
|
||||||
|
t.Errorf("Element %d of stored slice (=%d) did not match corresponding bit in 0b%b",
|
||||||
|
i, b, k)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
foo(x.tern(0b1111_0000, y, z), 0b1111_0000)
|
||||||
|
foo(x.tern(0b1100_1100, y, z), 0b1100_1100)
|
||||||
|
foo(x.tern(0b1010_1010, y, z), 0b1010_1010)
|
||||||
|
}
|
||||||
|
|
||||||
func TestSelect2x4x32(t *testing.T) {
|
func TestSelect2x4x32(t *testing.T) {
|
||||||
for a := range uint8(8) {
|
for a := range uint8(8) {
|
||||||
for b := range uint8(8) {
|
for b := range uint8(8) {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue