[dev.simd] cmd/compile: add dot product ops

This CL is generated by CL 678515.

Change-Id: Iac7c424bbbffc2514dff3495d6c408fa9c998c2f
Reviewed-on: https://go-review.googlesource.com/c/go/+/681296
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Junyang Shao 2025-06-12 16:24:24 +00:00
parent 34a9cdef87
commit 9ba7db36b5
8 changed files with 607 additions and 1 deletions

View file

@ -228,6 +228,9 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VORPD512, ssa.OpAMD64VORPD512,
ssa.OpAMD64VPORD512, ssa.OpAMD64VPORD512,
ssa.OpAMD64VPORQ512, ssa.OpAMD64VPORQ512,
ssa.OpAMD64VPMADDWD256,
ssa.OpAMD64VPMADDWD128,
ssa.OpAMD64VPMADDWD512,
ssa.OpAMD64VHADDPS128, ssa.OpAMD64VHADDPS128,
ssa.OpAMD64VHADDPS256, ssa.OpAMD64VHADDPS256,
ssa.OpAMD64VHADDPD128, ssa.OpAMD64VHADDPD128,
@ -260,6 +263,9 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPSUBSB256, ssa.OpAMD64VPSUBSB256,
ssa.OpAMD64VPSUBSW512, ssa.OpAMD64VPSUBSW512,
ssa.OpAMD64VPSUBSB512, ssa.OpAMD64VPSUBSB512,
ssa.OpAMD64VPMADDUBSW128,
ssa.OpAMD64VPMADDUBSW256,
ssa.OpAMD64VPMADDUBSW512,
ssa.OpAMD64VPSIGNW256, ssa.OpAMD64VPSIGNW256,
ssa.OpAMD64VPSIGNW128, ssa.OpAMD64VPSIGNW128,
ssa.OpAMD64VPSIGND128, ssa.OpAMD64VPSIGND128,
@ -460,6 +466,9 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPORQMasked128, ssa.OpAMD64VPORQMasked128,
ssa.OpAMD64VPORQMasked256, ssa.OpAMD64VPORQMasked256,
ssa.OpAMD64VPORQMasked512, ssa.OpAMD64VPORQMasked512,
ssa.OpAMD64VPMADDWDMasked256,
ssa.OpAMD64VPMADDWDMasked512,
ssa.OpAMD64VPMADDWDMasked128,
ssa.OpAMD64VPADDSWMasked256, ssa.OpAMD64VPADDSWMasked256,
ssa.OpAMD64VPADDSWMasked512, ssa.OpAMD64VPADDSWMasked512,
ssa.OpAMD64VPADDSWMasked128, ssa.OpAMD64VPADDSWMasked128,
@ -472,6 +481,9 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPSUBSBMasked128, ssa.OpAMD64VPSUBSBMasked128,
ssa.OpAMD64VPSUBSBMasked256, ssa.OpAMD64VPSUBSBMasked256,
ssa.OpAMD64VPSUBSBMasked512, ssa.OpAMD64VPSUBSBMasked512,
ssa.OpAMD64VPMADDUBSWMasked256,
ssa.OpAMD64VPMADDUBSWMasked512,
ssa.OpAMD64VPMADDUBSWMasked128,
ssa.OpAMD64VPSUBWMasked256, ssa.OpAMD64VPSUBWMasked256,
ssa.OpAMD64VPSUBWMasked512, ssa.OpAMD64VPSUBWMasked512,
ssa.OpAMD64VPSUBWMasked128, ssa.OpAMD64VPSUBWMasked128,
@ -600,7 +612,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VREDUCEPDMasked512: ssa.OpAMD64VREDUCEPDMasked512:
p = simdFp1k1fp1Imm8(s, v) p = simdFp1k1fp1Imm8(s, v)
case ssa.OpAMD64VCMPPS128, case ssa.OpAMD64VDPPD128,
ssa.OpAMD64VCMPPS128,
ssa.OpAMD64VCMPPS256, ssa.OpAMD64VCMPPS256,
ssa.OpAMD64VCMPPD128, ssa.OpAMD64VCMPPD128,
ssa.OpAMD64VCMPPD256: ssa.OpAMD64VCMPPD256:
@ -868,6 +881,9 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPORQMasked128, ssa.OpAMD64VPORQMasked128,
ssa.OpAMD64VPORQMasked256, ssa.OpAMD64VPORQMasked256,
ssa.OpAMD64VPORQMasked512, ssa.OpAMD64VPORQMasked512,
ssa.OpAMD64VPMADDWDMasked256,
ssa.OpAMD64VPMADDWDMasked512,
ssa.OpAMD64VPMADDWDMasked128,
ssa.OpAMD64VPOPCNTWMasked256, ssa.OpAMD64VPOPCNTWMasked256,
ssa.OpAMD64VPOPCNTWMasked512, ssa.OpAMD64VPOPCNTWMasked512,
ssa.OpAMD64VPOPCNTWMasked128, ssa.OpAMD64VPOPCNTWMasked128,
@ -892,6 +908,9 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPSUBSBMasked128, ssa.OpAMD64VPSUBSBMasked128,
ssa.OpAMD64VPSUBSBMasked256, ssa.OpAMD64VPSUBSBMasked256,
ssa.OpAMD64VPSUBSBMasked512, ssa.OpAMD64VPSUBSBMasked512,
ssa.OpAMD64VPMADDUBSWMasked256,
ssa.OpAMD64VPMADDUBSWMasked512,
ssa.OpAMD64VPMADDUBSWMasked128,
ssa.OpAMD64VSQRTPSMasked512, ssa.OpAMD64VSQRTPSMasked512,
ssa.OpAMD64VSQRTPSMasked128, ssa.OpAMD64VSQRTPSMasked128,
ssa.OpAMD64VSQRTPSMasked256, ssa.OpAMD64VSQRTPSMasked256,

View file

@ -186,6 +186,7 @@
(DivFloat64x2 ...) => (VDIVPD128 ...) (DivFloat64x2 ...) => (VDIVPD128 ...)
(DivFloat64x4 ...) => (VDIVPD256 ...) (DivFloat64x4 ...) => (VDIVPD256 ...)
(DivFloat64x8 ...) => (VDIVPD512 ...) (DivFloat64x8 ...) => (VDIVPD512 ...)
(DotProdBroadcastFloat64x2 x y) => (VDPPD128 [127] x y)
(EqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [0] x y)) (EqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [0] x y))
(EqualFloat32x4 x y) => (VCMPPS128 [0] x y) (EqualFloat32x4 x y) => (VCMPPS128 [0] x y)
(EqualFloat32x8 x y) => (VCMPPS256 [0] x y) (EqualFloat32x8 x y) => (VCMPPS256 [0] x y)
@ -829,6 +830,9 @@
(MaskedOrUint64x2 x y mask) => (VPORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask)) (MaskedOrUint64x2 x y mask) => (VPORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(MaskedOrUint64x4 x y mask) => (VPORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask)) (MaskedOrUint64x4 x y mask) => (VPORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(MaskedOrUint64x8 x y mask) => (VPORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask)) (MaskedOrUint64x8 x y mask) => (VPORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
(MaskedPairDotProdInt16x16 x y mask) => (VPMADDWDMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(MaskedPairDotProdInt16x32 x y mask) => (VPMADDWDMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(MaskedPairDotProdInt16x8 x y mask) => (VPMADDWDMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(MaskedPopCountInt16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask)) (MaskedPopCountInt16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
(MaskedPopCountInt16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask)) (MaskedPopCountInt16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
(MaskedPopCountInt16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask)) (MaskedPopCountInt16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
@ -889,6 +893,9 @@
(MaskedSaturatedSubUint8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask)) (MaskedSaturatedSubUint8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
(MaskedSaturatedSubUint8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask)) (MaskedSaturatedSubUint8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
(MaskedSaturatedSubUint8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask)) (MaskedSaturatedSubUint8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
(MaskedSaturatedUnsignedSignedPairDotProdUint16x16 x y mask) => (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(MaskedSaturatedUnsignedSignedPairDotProdUint16x32 x y mask) => (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(MaskedSaturatedUnsignedSignedPairDotProdUint16x8 x y mask) => (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(MaskedSqrtFloat32x16 x mask) => (VSQRTPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask)) (MaskedSqrtFloat32x16 x mask) => (VSQRTPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
(MaskedSqrtFloat32x4 x mask) => (VSQRTPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask)) (MaskedSqrtFloat32x4 x mask) => (VSQRTPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(MaskedSqrtFloat32x8 x mask) => (VSQRTPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask)) (MaskedSqrtFloat32x8 x mask) => (VSQRTPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
@ -1108,6 +1115,9 @@
(OrUint64x8 ...) => (VPORQ512 ...) (OrUint64x8 ...) => (VPORQ512 ...)
(OrUint8x16 ...) => (VPOR128 ...) (OrUint8x16 ...) => (VPOR128 ...)
(OrUint8x32 ...) => (VPOR256 ...) (OrUint8x32 ...) => (VPOR256 ...)
(PairDotProdInt16x16 ...) => (VPMADDWD256 ...)
(PairDotProdInt16x32 ...) => (VPMADDWD512 ...)
(PairDotProdInt16x8 ...) => (VPMADDWD128 ...)
(PairwiseAddFloat32x4 ...) => (VHADDPS128 ...) (PairwiseAddFloat32x4 ...) => (VHADDPS128 ...)
(PairwiseAddFloat32x8 ...) => (VHADDPS256 ...) (PairwiseAddFloat32x8 ...) => (VHADDPS256 ...)
(PairwiseAddFloat64x2 ...) => (VHADDPD128 ...) (PairwiseAddFloat64x2 ...) => (VHADDPD128 ...)
@ -1200,6 +1210,11 @@
(SaturatedSubUint8x16 ...) => (VPSUBSB128 ...) (SaturatedSubUint8x16 ...) => (VPSUBSB128 ...)
(SaturatedSubUint8x32 ...) => (VPSUBSB256 ...) (SaturatedSubUint8x32 ...) => (VPSUBSB256 ...)
(SaturatedSubUint8x64 ...) => (VPSUBSB512 ...) (SaturatedSubUint8x64 ...) => (VPSUBSB512 ...)
(SaturatedUnsignedSignedPairDotProdUint16x16 ...) => (VPMADDUBSW256 ...)
(SaturatedUnsignedSignedPairDotProdUint16x32 ...) => (VPMADDUBSW512 ...)
(SaturatedUnsignedSignedPairDotProdUint16x8 ...) => (VPMADDUBSW128 ...)
(SaturatedUnsignedSignedPairDotProdUint8x16 ...) => (VPMADDUBSW128 ...)
(SaturatedUnsignedSignedPairDotProdUint8x32 ...) => (VPMADDUBSW256 ...)
(SignInt16x16 ...) => (VPSIGNW256 ...) (SignInt16x16 ...) => (VPSIGNW256 ...)
(SignInt16x8 ...) => (VPSIGNW128 ...) (SignInt16x8 ...) => (VPSIGNW128 ...)
(SignInt32x4 ...) => (VPSIGND128 ...) (SignInt32x4 ...) => (VPSIGND128 ...)

View file

@ -185,6 +185,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
{name: "VPMINSWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMINSW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINSWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMINSW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULHWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMULHW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULHWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMULHW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULLWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMULLW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULLWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMULLW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMADDWDMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMADDWD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPOPCNTWMasked256", argLength: 2, reg: fp1k1fp1, asm: "VPOPCNTW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPOPCNTWMasked256", argLength: 2, reg: fp1k1fp1, asm: "VPOPCNTW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPADDSWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPADDSW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPADDSWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPADDSW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPSUBSWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPSUBSW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSUBSWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPSUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
@ -194,6 +195,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
{name: "VPMULHW256", argLength: 2, reg: fp21, asm: "VPMULHW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULHW256", argLength: 2, reg: fp21, asm: "VPMULHW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULLW256", argLength: 2, reg: fp21, asm: "VPMULLW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULLW256", argLength: 2, reg: fp21, asm: "VPMULLW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPOR256", argLength: 2, reg: fp21, asm: "VPOR", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPOR256", argLength: 2, reg: fp21, asm: "VPOR", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMADDWD256", argLength: 2, reg: fp21, asm: "VPMADDWD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPHADDW256", argLength: 2, reg: fp21, asm: "VPHADDW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPHADDW256", argLength: 2, reg: fp21, asm: "VPHADDW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPHSUBW256", argLength: 2, reg: fp21, asm: "VPHSUBW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPHSUBW256", argLength: 2, reg: fp21, asm: "VPHSUBW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPOPCNTW256", argLength: 1, reg: fp11, asm: "VPOPCNTW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPOPCNTW256", argLength: 1, reg: fp11, asm: "VPOPCNTW", commutative: false, typ: "Vec256", resultInArg0: false},
@ -216,6 +218,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
{name: "VPMINSWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMINSW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINSWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMINSW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULHWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMULHW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMULHWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMULHW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULLWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMULLW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMULLWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMULLW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMADDWDMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMADDWD", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPOPCNTWMasked512", argLength: 2, reg: fp1k1fp1, asm: "VPOPCNTW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPOPCNTWMasked512", argLength: 2, reg: fp1k1fp1, asm: "VPOPCNTW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPADDSWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPADDSW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPADDSWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPADDSW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPSUBSWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPSUBSW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSUBSWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPSUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
@ -224,6 +227,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
{name: "VPMINSW512", argLength: 2, reg: fp21, asm: "VPMINSW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINSW512", argLength: 2, reg: fp21, asm: "VPMINSW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULHW512", argLength: 2, reg: fp21, asm: "VPMULHW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMULHW512", argLength: 2, reg: fp21, asm: "VPMULHW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULLW512", argLength: 2, reg: fp21, asm: "VPMULLW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMULLW512", argLength: 2, reg: fp21, asm: "VPMULLW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMADDWD512", argLength: 2, reg: fp21, asm: "VPMADDWD", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPOPCNTW512", argLength: 1, reg: fp11, asm: "VPOPCNTW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPOPCNTW512", argLength: 1, reg: fp11, asm: "VPOPCNTW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPADDSW512", argLength: 2, reg: fp21, asm: "VPADDSW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPADDSW512", argLength: 2, reg: fp21, asm: "VPADDSW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPSUBSW512", argLength: 2, reg: fp21, asm: "VPSUBSW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSUBSW512", argLength: 2, reg: fp21, asm: "VPSUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
@ -242,6 +246,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
{name: "VPMINSWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMINSW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINSWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMINSW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULHWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMULHW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULHWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMULHW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULLWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMULLW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULLWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMULLW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMADDWDMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMADDWD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPOPCNTWMasked128", argLength: 2, reg: fp1k1fp1, asm: "VPOPCNTW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPOPCNTWMasked128", argLength: 2, reg: fp1k1fp1, asm: "VPOPCNTW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPADDSWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPADDSW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPADDSWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPADDSW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPSUBSWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPSUBSW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSUBSWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPSUBSW", commutative: false, typ: "Vec128", resultInArg0: false},
@ -251,6 +256,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
{name: "VPMULHW128", argLength: 2, reg: fp21, asm: "VPMULHW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULHW128", argLength: 2, reg: fp21, asm: "VPMULHW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULLW128", argLength: 2, reg: fp21, asm: "VPMULLW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULLW128", argLength: 2, reg: fp21, asm: "VPMULLW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPOR128", argLength: 2, reg: fp21, asm: "VPOR", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPOR128", argLength: 2, reg: fp21, asm: "VPOR", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMADDWD128", argLength: 2, reg: fp21, asm: "VPMADDWD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPHADDW128", argLength: 2, reg: fp21, asm: "VPHADDW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPHADDW128", argLength: 2, reg: fp21, asm: "VPHADDW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPHSUBW128", argLength: 2, reg: fp21, asm: "VPHSUBW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPHSUBW128", argLength: 2, reg: fp21, asm: "VPHSUBW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPOPCNTW128", argLength: 1, reg: fp11, asm: "VPOPCNTW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPOPCNTW128", argLength: 1, reg: fp11, asm: "VPOPCNTW", commutative: false, typ: "Vec128", resultInArg0: false},
@ -480,6 +486,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
{name: "VPMAXUWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMAXUWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMINUWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINUWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULHUWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULHUWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMADDUBSWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMADDUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMAXUW256", argLength: 2, reg: fp21, asm: "VPMAXUW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMAXUW256", argLength: 2, reg: fp21, asm: "VPMAXUW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMINUW256", argLength: 2, reg: fp21, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINUW256", argLength: 2, reg: fp21, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULHUW256", argLength: 2, reg: fp21, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULHUW256", argLength: 2, reg: fp21, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false},
@ -488,14 +495,17 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
{name: "VPMAXUWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMAXUWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMINUWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINUWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULHUWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMULHUWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMADDUBSWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMADDUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMAXUW512", argLength: 2, reg: fp21, asm: "VPMAXUW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMAXUW512", argLength: 2, reg: fp21, asm: "VPMAXUW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMINUW512", argLength: 2, reg: fp21, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINUW512", argLength: 2, reg: fp21, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULHUW512", argLength: 2, reg: fp21, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMULHUW512", argLength: 2, reg: fp21, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMADDUBSW512", argLength: 2, reg: fp21, asm: "VPMADDUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPAVGW128", argLength: 2, reg: fp21, asm: "VPAVGW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPAVGW128", argLength: 2, reg: fp21, asm: "VPAVGW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPAVGWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPAVGW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPAVGWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPAVGW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMAXUWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMAXUWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMINUWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINUWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULHUWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULHUWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMADDUBSWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMADDUBSW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMAXUW128", argLength: 2, reg: fp21, asm: "VPMAXUW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMAXUW128", argLength: 2, reg: fp21, asm: "VPMAXUW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMINUW128", argLength: 2, reg: fp21, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINUW128", argLength: 2, reg: fp21, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULHUW128", argLength: 2, reg: fp21, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULHUW128", argLength: 2, reg: fp21, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false},
@ -535,12 +545,14 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
{name: "VPMINUBMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMINUB", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINUBMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMINUB", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMAXUB128", argLength: 2, reg: fp21, asm: "VPMAXUB", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMAXUB128", argLength: 2, reg: fp21, asm: "VPMAXUB", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMINUB128", argLength: 2, reg: fp21, asm: "VPMINUB", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINUB128", argLength: 2, reg: fp21, asm: "VPMINUB", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMADDUBSW128", argLength: 2, reg: fp21, asm: "VPMADDUBSW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPAVGB256", argLength: 2, reg: fp21, asm: "VPAVGB", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPAVGB256", argLength: 2, reg: fp21, asm: "VPAVGB", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPAVGBMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPAVGB", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPAVGBMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPAVGB", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMAXUBMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUB", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMAXUBMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUB", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMINUBMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMINUB", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINUBMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMINUB", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMAXUB256", argLength: 2, reg: fp21, asm: "VPMAXUB", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMAXUB256", argLength: 2, reg: fp21, asm: "VPMAXUB", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMINUB256", argLength: 2, reg: fp21, asm: "VPMINUB", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINUB256", argLength: 2, reg: fp21, asm: "VPMINUB", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMADDUBSW256", argLength: 2, reg: fp21, asm: "VPMADDUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPAVGB512", argLength: 2, reg: fp21, asm: "VPAVGB", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPAVGB512", argLength: 2, reg: fp21, asm: "VPAVGB", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPAVGBMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPAVGB", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPAVGBMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPAVGB", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMAXUBMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUB", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMAXUBMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUB", commutative: true, typ: "Vec512", resultInArg0: false},
@ -570,6 +582,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
{name: "VROUNDPD128", argLength: 1, reg: fp11, asm: "VROUNDPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VROUNDPD128", argLength: 1, reg: fp11, asm: "VROUNDPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VRNDSCALEPD128", argLength: 1, reg: fp11, asm: "VRNDSCALEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VRNDSCALEPD128", argLength: 1, reg: fp11, asm: "VRNDSCALEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VREDUCEPD128", argLength: 1, reg: fp11, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VREDUCEPD128", argLength: 1, reg: fp11, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VDPPD128", argLength: 2, reg: fp21, asm: "VDPPD", aux: "Int8", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VCMPPD128", argLength: 2, reg: fp21, asm: "VCMPPD", aux: "Int8", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VCMPPD128", argLength: 2, reg: fp21, asm: "VCMPPD", aux: "Int8", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VRNDSCALEPDMasked128", argLength: 2, reg: fp1k1fp1, asm: "VRNDSCALEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VRNDSCALEPDMasked128", argLength: 2, reg: fp1k1fp1, asm: "VRNDSCALEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VREDUCEPDMasked128", argLength: 2, reg: fp1k1fp1, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VREDUCEPDMasked128", argLength: 2, reg: fp1k1fp1, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},

View file

@ -151,6 +151,7 @@ func simdGenericOps() []opData {
{name: "ApproximateReciprocalOfSqrtFloat64x2", argLength: 1, commutative: false}, {name: "ApproximateReciprocalOfSqrtFloat64x2", argLength: 1, commutative: false},
{name: "CeilFloat64x2", argLength: 1, commutative: false}, {name: "CeilFloat64x2", argLength: 1, commutative: false},
{name: "DivFloat64x2", argLength: 2, commutative: false}, {name: "DivFloat64x2", argLength: 2, commutative: false},
{name: "DotProdBroadcastFloat64x2", argLength: 2, commutative: true},
{name: "EqualFloat64x2", argLength: 2, commutative: true}, {name: "EqualFloat64x2", argLength: 2, commutative: true},
{name: "FloorFloat64x2", argLength: 1, commutative: false}, {name: "FloorFloat64x2", argLength: 1, commutative: false},
{name: "GreaterFloat64x2", argLength: 2, commutative: false}, {name: "GreaterFloat64x2", argLength: 2, commutative: false},
@ -304,6 +305,7 @@ func simdGenericOps() []opData {
{name: "MaskedMulHighInt16x16", argLength: 3, commutative: true}, {name: "MaskedMulHighInt16x16", argLength: 3, commutative: true},
{name: "MaskedMulLowInt16x16", argLength: 3, commutative: true}, {name: "MaskedMulLowInt16x16", argLength: 3, commutative: true},
{name: "MaskedNotEqualInt16x16", argLength: 3, commutative: true}, {name: "MaskedNotEqualInt16x16", argLength: 3, commutative: true},
{name: "MaskedPairDotProdInt16x16", argLength: 3, commutative: false},
{name: "MaskedPopCountInt16x16", argLength: 2, commutative: false}, {name: "MaskedPopCountInt16x16", argLength: 2, commutative: false},
{name: "MaskedSaturatedAddInt16x16", argLength: 3, commutative: true}, {name: "MaskedSaturatedAddInt16x16", argLength: 3, commutative: true},
{name: "MaskedSaturatedSubInt16x16", argLength: 3, commutative: false}, {name: "MaskedSaturatedSubInt16x16", argLength: 3, commutative: false},
@ -314,6 +316,7 @@ func simdGenericOps() []opData {
{name: "MulLowInt16x16", argLength: 2, commutative: true}, {name: "MulLowInt16x16", argLength: 2, commutative: true},
{name: "NotEqualInt16x16", argLength: 2, commutative: true}, {name: "NotEqualInt16x16", argLength: 2, commutative: true},
{name: "OrInt16x16", argLength: 2, commutative: true}, {name: "OrInt16x16", argLength: 2, commutative: true},
{name: "PairDotProdInt16x16", argLength: 2, commutative: false},
{name: "PairwiseAddInt16x16", argLength: 2, commutative: false}, {name: "PairwiseAddInt16x16", argLength: 2, commutative: false},
{name: "PairwiseSubInt16x16", argLength: 2, commutative: false}, {name: "PairwiseSubInt16x16", argLength: 2, commutative: false},
{name: "PopCountInt16x16", argLength: 1, commutative: false}, {name: "PopCountInt16x16", argLength: 1, commutative: false},
@ -343,6 +346,7 @@ func simdGenericOps() []opData {
{name: "MaskedMulHighInt16x32", argLength: 3, commutative: true}, {name: "MaskedMulHighInt16x32", argLength: 3, commutative: true},
{name: "MaskedMulLowInt16x32", argLength: 3, commutative: true}, {name: "MaskedMulLowInt16x32", argLength: 3, commutative: true},
{name: "MaskedNotEqualInt16x32", argLength: 3, commutative: true}, {name: "MaskedNotEqualInt16x32", argLength: 3, commutative: true},
{name: "MaskedPairDotProdInt16x32", argLength: 3, commutative: false},
{name: "MaskedPopCountInt16x32", argLength: 2, commutative: false}, {name: "MaskedPopCountInt16x32", argLength: 2, commutative: false},
{name: "MaskedSaturatedAddInt16x32", argLength: 3, commutative: true}, {name: "MaskedSaturatedAddInt16x32", argLength: 3, commutative: true},
{name: "MaskedSaturatedSubInt16x32", argLength: 3, commutative: false}, {name: "MaskedSaturatedSubInt16x32", argLength: 3, commutative: false},
@ -352,6 +356,7 @@ func simdGenericOps() []opData {
{name: "MulHighInt16x32", argLength: 2, commutative: true}, {name: "MulHighInt16x32", argLength: 2, commutative: true},
{name: "MulLowInt16x32", argLength: 2, commutative: true}, {name: "MulLowInt16x32", argLength: 2, commutative: true},
{name: "NotEqualInt16x32", argLength: 2, commutative: true}, {name: "NotEqualInt16x32", argLength: 2, commutative: true},
{name: "PairDotProdInt16x32", argLength: 2, commutative: false},
{name: "PopCountInt16x32", argLength: 1, commutative: false}, {name: "PopCountInt16x32", argLength: 1, commutative: false},
{name: "SaturatedAddInt16x32", argLength: 2, commutative: true}, {name: "SaturatedAddInt16x32", argLength: 2, commutative: true},
{name: "SaturatedSubInt16x32", argLength: 2, commutative: false}, {name: "SaturatedSubInt16x32", argLength: 2, commutative: false},
@ -377,6 +382,7 @@ func simdGenericOps() []opData {
{name: "MaskedMulHighInt16x8", argLength: 3, commutative: true}, {name: "MaskedMulHighInt16x8", argLength: 3, commutative: true},
{name: "MaskedMulLowInt16x8", argLength: 3, commutative: true}, {name: "MaskedMulLowInt16x8", argLength: 3, commutative: true},
{name: "MaskedNotEqualInt16x8", argLength: 3, commutative: true}, {name: "MaskedNotEqualInt16x8", argLength: 3, commutative: true},
{name: "MaskedPairDotProdInt16x8", argLength: 3, commutative: false},
{name: "MaskedPopCountInt16x8", argLength: 2, commutative: false}, {name: "MaskedPopCountInt16x8", argLength: 2, commutative: false},
{name: "MaskedSaturatedAddInt16x8", argLength: 3, commutative: true}, {name: "MaskedSaturatedAddInt16x8", argLength: 3, commutative: true},
{name: "MaskedSaturatedSubInt16x8", argLength: 3, commutative: false}, {name: "MaskedSaturatedSubInt16x8", argLength: 3, commutative: false},
@ -387,6 +393,7 @@ func simdGenericOps() []opData {
{name: "MulLowInt16x8", argLength: 2, commutative: true}, {name: "MulLowInt16x8", argLength: 2, commutative: true},
{name: "NotEqualInt16x8", argLength: 2, commutative: true}, {name: "NotEqualInt16x8", argLength: 2, commutative: true},
{name: "OrInt16x8", argLength: 2, commutative: true}, {name: "OrInt16x8", argLength: 2, commutative: true},
{name: "PairDotProdInt16x8", argLength: 2, commutative: false},
{name: "PairwiseAddInt16x8", argLength: 2, commutative: false}, {name: "PairwiseAddInt16x8", argLength: 2, commutative: false},
{name: "PairwiseSubInt16x8", argLength: 2, commutative: false}, {name: "PairwiseSubInt16x8", argLength: 2, commutative: false},
{name: "PopCountInt16x8", argLength: 1, commutative: false}, {name: "PopCountInt16x8", argLength: 1, commutative: false},
@ -732,6 +739,7 @@ func simdGenericOps() []opData {
{name: "MaskedPopCountUint16x16", argLength: 2, commutative: false}, {name: "MaskedPopCountUint16x16", argLength: 2, commutative: false},
{name: "MaskedSaturatedAddUint16x16", argLength: 3, commutative: true}, {name: "MaskedSaturatedAddUint16x16", argLength: 3, commutative: true},
{name: "MaskedSaturatedSubUint16x16", argLength: 3, commutative: false}, {name: "MaskedSaturatedSubUint16x16", argLength: 3, commutative: false},
{name: "MaskedSaturatedUnsignedSignedPairDotProdUint16x16", argLength: 3, commutative: false},
{name: "MaskedSubUint16x16", argLength: 3, commutative: false}, {name: "MaskedSubUint16x16", argLength: 3, commutative: false},
{name: "MaxUint16x16", argLength: 2, commutative: true}, {name: "MaxUint16x16", argLength: 2, commutative: true},
{name: "MinUint16x16", argLength: 2, commutative: true}, {name: "MinUint16x16", argLength: 2, commutative: true},
@ -743,6 +751,7 @@ func simdGenericOps() []opData {
{name: "PopCountUint16x16", argLength: 1, commutative: false}, {name: "PopCountUint16x16", argLength: 1, commutative: false},
{name: "SaturatedAddUint16x16", argLength: 2, commutative: true}, {name: "SaturatedAddUint16x16", argLength: 2, commutative: true},
{name: "SaturatedSubUint16x16", argLength: 2, commutative: false}, {name: "SaturatedSubUint16x16", argLength: 2, commutative: false},
{name: "SaturatedUnsignedSignedPairDotProdUint16x16", argLength: 2, commutative: false},
{name: "SubUint16x16", argLength: 2, commutative: false}, {name: "SubUint16x16", argLength: 2, commutative: false},
{name: "XorUint16x16", argLength: 2, commutative: true}, {name: "XorUint16x16", argLength: 2, commutative: true},
{name: "AddUint16x32", argLength: 2, commutative: true}, {name: "AddUint16x32", argLength: 2, commutative: true},
@ -766,6 +775,7 @@ func simdGenericOps() []opData {
{name: "MaskedPopCountUint16x32", argLength: 2, commutative: false}, {name: "MaskedPopCountUint16x32", argLength: 2, commutative: false},
{name: "MaskedSaturatedAddUint16x32", argLength: 3, commutative: true}, {name: "MaskedSaturatedAddUint16x32", argLength: 3, commutative: true},
{name: "MaskedSaturatedSubUint16x32", argLength: 3, commutative: false}, {name: "MaskedSaturatedSubUint16x32", argLength: 3, commutative: false},
{name: "MaskedSaturatedUnsignedSignedPairDotProdUint16x32", argLength: 3, commutative: false},
{name: "MaskedSubUint16x32", argLength: 3, commutative: false}, {name: "MaskedSubUint16x32", argLength: 3, commutative: false},
{name: "MaxUint16x32", argLength: 2, commutative: true}, {name: "MaxUint16x32", argLength: 2, commutative: true},
{name: "MinUint16x32", argLength: 2, commutative: true}, {name: "MinUint16x32", argLength: 2, commutative: true},
@ -774,6 +784,7 @@ func simdGenericOps() []opData {
{name: "PopCountUint16x32", argLength: 1, commutative: false}, {name: "PopCountUint16x32", argLength: 1, commutative: false},
{name: "SaturatedAddUint16x32", argLength: 2, commutative: true}, {name: "SaturatedAddUint16x32", argLength: 2, commutative: true},
{name: "SaturatedSubUint16x32", argLength: 2, commutative: false}, {name: "SaturatedSubUint16x32", argLength: 2, commutative: false},
{name: "SaturatedUnsignedSignedPairDotProdUint16x32", argLength: 2, commutative: false},
{name: "SubUint16x32", argLength: 2, commutative: false}, {name: "SubUint16x32", argLength: 2, commutative: false},
{name: "AddUint16x8", argLength: 2, commutative: true}, {name: "AddUint16x8", argLength: 2, commutative: true},
{name: "AndUint16x8", argLength: 2, commutative: true}, {name: "AndUint16x8", argLength: 2, commutative: true},
@ -798,6 +809,7 @@ func simdGenericOps() []opData {
{name: "MaskedPopCountUint16x8", argLength: 2, commutative: false}, {name: "MaskedPopCountUint16x8", argLength: 2, commutative: false},
{name: "MaskedSaturatedAddUint16x8", argLength: 3, commutative: true}, {name: "MaskedSaturatedAddUint16x8", argLength: 3, commutative: true},
{name: "MaskedSaturatedSubUint16x8", argLength: 3, commutative: false}, {name: "MaskedSaturatedSubUint16x8", argLength: 3, commutative: false},
{name: "MaskedSaturatedUnsignedSignedPairDotProdUint16x8", argLength: 3, commutative: false},
{name: "MaskedSubUint16x8", argLength: 3, commutative: false}, {name: "MaskedSubUint16x8", argLength: 3, commutative: false},
{name: "MaxUint16x8", argLength: 2, commutative: true}, {name: "MaxUint16x8", argLength: 2, commutative: true},
{name: "MinUint16x8", argLength: 2, commutative: true}, {name: "MinUint16x8", argLength: 2, commutative: true},
@ -809,6 +821,7 @@ func simdGenericOps() []opData {
{name: "PopCountUint16x8", argLength: 1, commutative: false}, {name: "PopCountUint16x8", argLength: 1, commutative: false},
{name: "SaturatedAddUint16x8", argLength: 2, commutative: true}, {name: "SaturatedAddUint16x8", argLength: 2, commutative: true},
{name: "SaturatedSubUint16x8", argLength: 2, commutative: false}, {name: "SaturatedSubUint16x8", argLength: 2, commutative: false},
{name: "SaturatedUnsignedSignedPairDotProdUint16x8", argLength: 2, commutative: false},
{name: "SubUint16x8", argLength: 2, commutative: false}, {name: "SubUint16x8", argLength: 2, commutative: false},
{name: "XorUint16x8", argLength: 2, commutative: true}, {name: "XorUint16x8", argLength: 2, commutative: true},
{name: "AddUint32x16", argLength: 2, commutative: true}, {name: "AddUint32x16", argLength: 2, commutative: true},
@ -1033,6 +1046,7 @@ func simdGenericOps() []opData {
{name: "PopCountUint8x16", argLength: 1, commutative: false}, {name: "PopCountUint8x16", argLength: 1, commutative: false},
{name: "SaturatedAddUint8x16", argLength: 2, commutative: true}, {name: "SaturatedAddUint8x16", argLength: 2, commutative: true},
{name: "SaturatedSubUint8x16", argLength: 2, commutative: false}, {name: "SaturatedSubUint8x16", argLength: 2, commutative: false},
{name: "SaturatedUnsignedSignedPairDotProdUint8x16", argLength: 2, commutative: false},
{name: "SubUint8x16", argLength: 2, commutative: false}, {name: "SubUint8x16", argLength: 2, commutative: false},
{name: "XorUint8x16", argLength: 2, commutative: true}, {name: "XorUint8x16", argLength: 2, commutative: true},
{name: "AddUint8x32", argLength: 2, commutative: true}, {name: "AddUint8x32", argLength: 2, commutative: true},
@ -1065,6 +1079,7 @@ func simdGenericOps() []opData {
{name: "PopCountUint8x32", argLength: 1, commutative: false}, {name: "PopCountUint8x32", argLength: 1, commutative: false},
{name: "SaturatedAddUint8x32", argLength: 2, commutative: true}, {name: "SaturatedAddUint8x32", argLength: 2, commutative: true},
{name: "SaturatedSubUint8x32", argLength: 2, commutative: false}, {name: "SaturatedSubUint8x32", argLength: 2, commutative: false},
{name: "SaturatedUnsignedSignedPairDotProdUint8x32", argLength: 2, commutative: false},
{name: "SubUint8x32", argLength: 2, commutative: false}, {name: "SubUint8x32", argLength: 2, commutative: false},
{name: "XorUint8x32", argLength: 2, commutative: true}, {name: "XorUint8x32", argLength: 2, commutative: true},
{name: "AddUint8x64", argLength: 2, commutative: true}, {name: "AddUint8x64", argLength: 2, commutative: true},

View file

@ -1378,6 +1378,7 @@ const (
OpAMD64VPMINSWMasked256 OpAMD64VPMINSWMasked256
OpAMD64VPMULHWMasked256 OpAMD64VPMULHWMasked256
OpAMD64VPMULLWMasked256 OpAMD64VPMULLWMasked256
OpAMD64VPMADDWDMasked256
OpAMD64VPOPCNTWMasked256 OpAMD64VPOPCNTWMasked256
OpAMD64VPADDSWMasked256 OpAMD64VPADDSWMasked256
OpAMD64VPSUBSWMasked256 OpAMD64VPSUBSWMasked256
@ -1387,6 +1388,7 @@ const (
OpAMD64VPMULHW256 OpAMD64VPMULHW256
OpAMD64VPMULLW256 OpAMD64VPMULLW256
OpAMD64VPOR256 OpAMD64VPOR256
OpAMD64VPMADDWD256
OpAMD64VPHADDW256 OpAMD64VPHADDW256
OpAMD64VPHSUBW256 OpAMD64VPHSUBW256
OpAMD64VPOPCNTW256 OpAMD64VPOPCNTW256
@ -1409,6 +1411,7 @@ const (
OpAMD64VPMINSWMasked512 OpAMD64VPMINSWMasked512
OpAMD64VPMULHWMasked512 OpAMD64VPMULHWMasked512
OpAMD64VPMULLWMasked512 OpAMD64VPMULLWMasked512
OpAMD64VPMADDWDMasked512
OpAMD64VPOPCNTWMasked512 OpAMD64VPOPCNTWMasked512
OpAMD64VPADDSWMasked512 OpAMD64VPADDSWMasked512
OpAMD64VPSUBSWMasked512 OpAMD64VPSUBSWMasked512
@ -1417,6 +1420,7 @@ const (
OpAMD64VPMINSW512 OpAMD64VPMINSW512
OpAMD64VPMULHW512 OpAMD64VPMULHW512
OpAMD64VPMULLW512 OpAMD64VPMULLW512
OpAMD64VPMADDWD512
OpAMD64VPOPCNTW512 OpAMD64VPOPCNTW512
OpAMD64VPADDSW512 OpAMD64VPADDSW512
OpAMD64VPSUBSW512 OpAMD64VPSUBSW512
@ -1435,6 +1439,7 @@ const (
OpAMD64VPMINSWMasked128 OpAMD64VPMINSWMasked128
OpAMD64VPMULHWMasked128 OpAMD64VPMULHWMasked128
OpAMD64VPMULLWMasked128 OpAMD64VPMULLWMasked128
OpAMD64VPMADDWDMasked128
OpAMD64VPOPCNTWMasked128 OpAMD64VPOPCNTWMasked128
OpAMD64VPADDSWMasked128 OpAMD64VPADDSWMasked128
OpAMD64VPSUBSWMasked128 OpAMD64VPSUBSWMasked128
@ -1444,6 +1449,7 @@ const (
OpAMD64VPMULHW128 OpAMD64VPMULHW128
OpAMD64VPMULLW128 OpAMD64VPMULLW128
OpAMD64VPOR128 OpAMD64VPOR128
OpAMD64VPMADDWD128
OpAMD64VPHADDW128 OpAMD64VPHADDW128
OpAMD64VPHSUBW128 OpAMD64VPHSUBW128
OpAMD64VPOPCNTW128 OpAMD64VPOPCNTW128
@ -1673,6 +1679,7 @@ const (
OpAMD64VPMAXUWMasked256 OpAMD64VPMAXUWMasked256
OpAMD64VPMINUWMasked256 OpAMD64VPMINUWMasked256
OpAMD64VPMULHUWMasked256 OpAMD64VPMULHUWMasked256
OpAMD64VPMADDUBSWMasked256
OpAMD64VPMAXUW256 OpAMD64VPMAXUW256
OpAMD64VPMINUW256 OpAMD64VPMINUW256
OpAMD64VPMULHUW256 OpAMD64VPMULHUW256
@ -1681,14 +1688,17 @@ const (
OpAMD64VPMAXUWMasked512 OpAMD64VPMAXUWMasked512
OpAMD64VPMINUWMasked512 OpAMD64VPMINUWMasked512
OpAMD64VPMULHUWMasked512 OpAMD64VPMULHUWMasked512
OpAMD64VPMADDUBSWMasked512
OpAMD64VPMAXUW512 OpAMD64VPMAXUW512
OpAMD64VPMINUW512 OpAMD64VPMINUW512
OpAMD64VPMULHUW512 OpAMD64VPMULHUW512
OpAMD64VPMADDUBSW512
OpAMD64VPAVGW128 OpAMD64VPAVGW128
OpAMD64VPAVGWMasked128 OpAMD64VPAVGWMasked128
OpAMD64VPMAXUWMasked128 OpAMD64VPMAXUWMasked128
OpAMD64VPMINUWMasked128 OpAMD64VPMINUWMasked128
OpAMD64VPMULHUWMasked128 OpAMD64VPMULHUWMasked128
OpAMD64VPMADDUBSWMasked128
OpAMD64VPMAXUW128 OpAMD64VPMAXUW128
OpAMD64VPMINUW128 OpAMD64VPMINUW128
OpAMD64VPMULHUW128 OpAMD64VPMULHUW128
@ -1728,12 +1738,14 @@ const (
OpAMD64VPMINUBMasked128 OpAMD64VPMINUBMasked128
OpAMD64VPMAXUB128 OpAMD64VPMAXUB128
OpAMD64VPMINUB128 OpAMD64VPMINUB128
OpAMD64VPMADDUBSW128
OpAMD64VPAVGB256 OpAMD64VPAVGB256
OpAMD64VPAVGBMasked256 OpAMD64VPAVGBMasked256
OpAMD64VPMAXUBMasked256 OpAMD64VPMAXUBMasked256
OpAMD64VPMINUBMasked256 OpAMD64VPMINUBMasked256
OpAMD64VPMAXUB256 OpAMD64VPMAXUB256
OpAMD64VPMINUB256 OpAMD64VPMINUB256
OpAMD64VPMADDUBSW256
OpAMD64VPAVGB512 OpAMD64VPAVGB512
OpAMD64VPAVGBMasked512 OpAMD64VPAVGBMasked512
OpAMD64VPMAXUBMasked512 OpAMD64VPMAXUBMasked512
@ -1763,6 +1775,7 @@ const (
OpAMD64VROUNDPD128 OpAMD64VROUNDPD128
OpAMD64VRNDSCALEPD128 OpAMD64VRNDSCALEPD128
OpAMD64VREDUCEPD128 OpAMD64VREDUCEPD128
OpAMD64VDPPD128
OpAMD64VCMPPD128 OpAMD64VCMPPD128
OpAMD64VRNDSCALEPDMasked128 OpAMD64VRNDSCALEPDMasked128
OpAMD64VREDUCEPDMasked128 OpAMD64VREDUCEPDMasked128
@ -4202,6 +4215,7 @@ const (
OpApproximateReciprocalOfSqrtFloat64x2 OpApproximateReciprocalOfSqrtFloat64x2
OpCeilFloat64x2 OpCeilFloat64x2
OpDivFloat64x2 OpDivFloat64x2
OpDotProdBroadcastFloat64x2
OpEqualFloat64x2 OpEqualFloat64x2
OpFloorFloat64x2 OpFloorFloat64x2
OpGreaterFloat64x2 OpGreaterFloat64x2
@ -4355,6 +4369,7 @@ const (
OpMaskedMulHighInt16x16 OpMaskedMulHighInt16x16
OpMaskedMulLowInt16x16 OpMaskedMulLowInt16x16
OpMaskedNotEqualInt16x16 OpMaskedNotEqualInt16x16
OpMaskedPairDotProdInt16x16
OpMaskedPopCountInt16x16 OpMaskedPopCountInt16x16
OpMaskedSaturatedAddInt16x16 OpMaskedSaturatedAddInt16x16
OpMaskedSaturatedSubInt16x16 OpMaskedSaturatedSubInt16x16
@ -4365,6 +4380,7 @@ const (
OpMulLowInt16x16 OpMulLowInt16x16
OpNotEqualInt16x16 OpNotEqualInt16x16
OpOrInt16x16 OpOrInt16x16
OpPairDotProdInt16x16
OpPairwiseAddInt16x16 OpPairwiseAddInt16x16
OpPairwiseSubInt16x16 OpPairwiseSubInt16x16
OpPopCountInt16x16 OpPopCountInt16x16
@ -4394,6 +4410,7 @@ const (
OpMaskedMulHighInt16x32 OpMaskedMulHighInt16x32
OpMaskedMulLowInt16x32 OpMaskedMulLowInt16x32
OpMaskedNotEqualInt16x32 OpMaskedNotEqualInt16x32
OpMaskedPairDotProdInt16x32
OpMaskedPopCountInt16x32 OpMaskedPopCountInt16x32
OpMaskedSaturatedAddInt16x32 OpMaskedSaturatedAddInt16x32
OpMaskedSaturatedSubInt16x32 OpMaskedSaturatedSubInt16x32
@ -4403,6 +4420,7 @@ const (
OpMulHighInt16x32 OpMulHighInt16x32
OpMulLowInt16x32 OpMulLowInt16x32
OpNotEqualInt16x32 OpNotEqualInt16x32
OpPairDotProdInt16x32
OpPopCountInt16x32 OpPopCountInt16x32
OpSaturatedAddInt16x32 OpSaturatedAddInt16x32
OpSaturatedSubInt16x32 OpSaturatedSubInt16x32
@ -4428,6 +4446,7 @@ const (
OpMaskedMulHighInt16x8 OpMaskedMulHighInt16x8
OpMaskedMulLowInt16x8 OpMaskedMulLowInt16x8
OpMaskedNotEqualInt16x8 OpMaskedNotEqualInt16x8
OpMaskedPairDotProdInt16x8
OpMaskedPopCountInt16x8 OpMaskedPopCountInt16x8
OpMaskedSaturatedAddInt16x8 OpMaskedSaturatedAddInt16x8
OpMaskedSaturatedSubInt16x8 OpMaskedSaturatedSubInt16x8
@ -4438,6 +4457,7 @@ const (
OpMulLowInt16x8 OpMulLowInt16x8
OpNotEqualInt16x8 OpNotEqualInt16x8
OpOrInt16x8 OpOrInt16x8
OpPairDotProdInt16x8
OpPairwiseAddInt16x8 OpPairwiseAddInt16x8
OpPairwiseSubInt16x8 OpPairwiseSubInt16x8
OpPopCountInt16x8 OpPopCountInt16x8
@ -4783,6 +4803,7 @@ const (
OpMaskedPopCountUint16x16 OpMaskedPopCountUint16x16
OpMaskedSaturatedAddUint16x16 OpMaskedSaturatedAddUint16x16
OpMaskedSaturatedSubUint16x16 OpMaskedSaturatedSubUint16x16
OpMaskedSaturatedUnsignedSignedPairDotProdUint16x16
OpMaskedSubUint16x16 OpMaskedSubUint16x16
OpMaxUint16x16 OpMaxUint16x16
OpMinUint16x16 OpMinUint16x16
@ -4794,6 +4815,7 @@ const (
OpPopCountUint16x16 OpPopCountUint16x16
OpSaturatedAddUint16x16 OpSaturatedAddUint16x16
OpSaturatedSubUint16x16 OpSaturatedSubUint16x16
OpSaturatedUnsignedSignedPairDotProdUint16x16
OpSubUint16x16 OpSubUint16x16
OpXorUint16x16 OpXorUint16x16
OpAddUint16x32 OpAddUint16x32
@ -4817,6 +4839,7 @@ const (
OpMaskedPopCountUint16x32 OpMaskedPopCountUint16x32
OpMaskedSaturatedAddUint16x32 OpMaskedSaturatedAddUint16x32
OpMaskedSaturatedSubUint16x32 OpMaskedSaturatedSubUint16x32
OpMaskedSaturatedUnsignedSignedPairDotProdUint16x32
OpMaskedSubUint16x32 OpMaskedSubUint16x32
OpMaxUint16x32 OpMaxUint16x32
OpMinUint16x32 OpMinUint16x32
@ -4825,6 +4848,7 @@ const (
OpPopCountUint16x32 OpPopCountUint16x32
OpSaturatedAddUint16x32 OpSaturatedAddUint16x32
OpSaturatedSubUint16x32 OpSaturatedSubUint16x32
OpSaturatedUnsignedSignedPairDotProdUint16x32
OpSubUint16x32 OpSubUint16x32
OpAddUint16x8 OpAddUint16x8
OpAndUint16x8 OpAndUint16x8
@ -4849,6 +4873,7 @@ const (
OpMaskedPopCountUint16x8 OpMaskedPopCountUint16x8
OpMaskedSaturatedAddUint16x8 OpMaskedSaturatedAddUint16x8
OpMaskedSaturatedSubUint16x8 OpMaskedSaturatedSubUint16x8
OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8
OpMaskedSubUint16x8 OpMaskedSubUint16x8
OpMaxUint16x8 OpMaxUint16x8
OpMinUint16x8 OpMinUint16x8
@ -4860,6 +4885,7 @@ const (
OpPopCountUint16x8 OpPopCountUint16x8
OpSaturatedAddUint16x8 OpSaturatedAddUint16x8
OpSaturatedSubUint16x8 OpSaturatedSubUint16x8
OpSaturatedUnsignedSignedPairDotProdUint16x8
OpSubUint16x8 OpSubUint16x8
OpXorUint16x8 OpXorUint16x8
OpAddUint32x16 OpAddUint32x16
@ -5084,6 +5110,7 @@ const (
OpPopCountUint8x16 OpPopCountUint8x16
OpSaturatedAddUint8x16 OpSaturatedAddUint8x16
OpSaturatedSubUint8x16 OpSaturatedSubUint8x16
OpSaturatedUnsignedSignedPairDotProdUint8x16
OpSubUint8x16 OpSubUint8x16
OpXorUint8x16 OpXorUint8x16
OpAddUint8x32 OpAddUint8x32
@ -5116,6 +5143,7 @@ const (
OpPopCountUint8x32 OpPopCountUint8x32
OpSaturatedAddUint8x32 OpSaturatedAddUint8x32
OpSaturatedSubUint8x32 OpSaturatedSubUint8x32
OpSaturatedUnsignedSignedPairDotProdUint8x32
OpSubUint8x32 OpSubUint8x32
OpXorUint8x32 OpXorUint8x32
OpAddUint8x64 OpAddUint8x64
@ -20635,6 +20663,21 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPMADDWDMasked256",
argLen: 3,
asm: x86.AVPMADDWD,
reg: regInfo{
inputs: []inputInfo{
{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VPOPCNTWMasked256", name: "VPOPCNTWMasked256",
argLen: 2, argLen: 2,
@ -20770,6 +20813,20 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPMADDWD256",
argLen: 2,
asm: x86.AVPMADDWD,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VPHADDW256", name: "VPHADDW256",
argLen: 2, argLen: 2,
@ -21093,6 +21150,21 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPMADDWDMasked512",
argLen: 3,
asm: x86.AVPMADDWD,
reg: regInfo{
inputs: []inputInfo{
{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VPOPCNTWMasked512", name: "VPOPCNTWMasked512",
argLen: 2, argLen: 2,
@ -21213,6 +21285,20 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPMADDWD512",
argLen: 2,
asm: x86.AVPMADDWD,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VPOPCNTW512", name: "VPOPCNTW512",
argLen: 1, argLen: 1,
@ -21481,6 +21567,21 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPMADDWDMasked128",
argLen: 3,
asm: x86.AVPMADDWD,
reg: regInfo{
inputs: []inputInfo{
{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VPOPCNTWMasked128", name: "VPOPCNTWMasked128",
argLen: 2, argLen: 2,
@ -21616,6 +21717,20 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPMADDWD128",
argLen: 2,
asm: x86.AVPMADDWD,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VPHADDW128", name: "VPHADDW128",
argLen: 2, argLen: 2,
@ -25035,6 +25150,21 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPMADDUBSWMasked256",
argLen: 3,
asm: x86.AVPMADDUBSW,
reg: regInfo{
inputs: []inputInfo{
{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VPMAXUW256", name: "VPMAXUW256",
argLen: 2, argLen: 2,
@ -25159,6 +25289,21 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPMADDUBSWMasked512",
argLen: 3,
asm: x86.AVPMADDUBSW,
reg: regInfo{
inputs: []inputInfo{
{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VPMAXUW512", name: "VPMAXUW512",
argLen: 2, argLen: 2,
@ -25204,6 +25349,20 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPMADDUBSW512",
argLen: 2,
asm: x86.AVPMADDUBSW,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VPAVGW128", name: "VPAVGW128",
argLen: 2, argLen: 2,
@ -25283,6 +25442,21 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPMADDUBSWMasked128",
argLen: 3,
asm: x86.AVPMADDUBSW,
reg: regInfo{
inputs: []inputInfo{
{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VPMAXUW128", name: "VPMAXUW128",
argLen: 2, argLen: 2,
@ -25886,6 +26060,20 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPMADDUBSW128",
argLen: 2,
asm: x86.AVPMADDUBSW,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VPAVGB256", name: "VPAVGB256",
argLen: 2, argLen: 2,
@ -25979,6 +26167,20 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPMADDUBSW256",
argLen: 2,
asm: x86.AVPMADDUBSW,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VPAVGB512", name: "VPAVGB512",
argLen: 2, argLen: 2,
@ -26415,6 +26617,22 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VDPPD128",
auxType: auxInt8,
argLen: 2,
commutative: true,
asm: x86.AVDPPD,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VCMPPD128", name: "VCMPPD128",
auxType: auxInt8, auxType: auxInt8,
@ -55403,6 +55621,12 @@ var opcodeTable = [...]opInfo{
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
{
name: "DotProdBroadcastFloat64x2",
argLen: 2,
commutative: true,
generic: true,
},
{ {
name: "EqualFloat64x2", name: "EqualFloat64x2",
argLen: 2, argLen: 2,
@ -56242,6 +56466,11 @@ var opcodeTable = [...]opInfo{
commutative: true, commutative: true,
generic: true, generic: true,
}, },
{
name: "MaskedPairDotProdInt16x16",
argLen: 3,
generic: true,
},
{ {
name: "MaskedPopCountInt16x16", name: "MaskedPopCountInt16x16",
argLen: 2, argLen: 2,
@ -56299,6 +56528,11 @@ var opcodeTable = [...]opInfo{
commutative: true, commutative: true,
generic: true, generic: true,
}, },
{
name: "PairDotProdInt16x16",
argLen: 2,
generic: true,
},
{ {
name: "PairwiseAddInt16x16", name: "PairwiseAddInt16x16",
argLen: 2, argLen: 2,
@ -56455,6 +56689,11 @@ var opcodeTable = [...]opInfo{
commutative: true, commutative: true,
generic: true, generic: true,
}, },
{
name: "MaskedPairDotProdInt16x32",
argLen: 3,
generic: true,
},
{ {
name: "MaskedPopCountInt16x32", name: "MaskedPopCountInt16x32",
argLen: 2, argLen: 2,
@ -56506,6 +56745,11 @@ var opcodeTable = [...]opInfo{
commutative: true, commutative: true,
generic: true, generic: true,
}, },
{
name: "PairDotProdInt16x32",
argLen: 2,
generic: true,
},
{ {
name: "PopCountInt16x32", name: "PopCountInt16x32",
argLen: 1, argLen: 1,
@ -56643,6 +56887,11 @@ var opcodeTable = [...]opInfo{
commutative: true, commutative: true,
generic: true, generic: true,
}, },
{
name: "MaskedPairDotProdInt16x8",
argLen: 3,
generic: true,
},
{ {
name: "MaskedPopCountInt16x8", name: "MaskedPopCountInt16x8",
argLen: 2, argLen: 2,
@ -56700,6 +56949,11 @@ var opcodeTable = [...]opInfo{
commutative: true, commutative: true,
generic: true, generic: true,
}, },
{
name: "PairDotProdInt16x8",
argLen: 2,
generic: true,
},
{ {
name: "PairwiseAddInt16x8", name: "PairwiseAddInt16x8",
argLen: 2, argLen: 2,
@ -58612,6 +58866,11 @@ var opcodeTable = [...]opInfo{
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{
name: "MaskedSaturatedUnsignedSignedPairDotProdUint16x16",
argLen: 3,
generic: true,
},
{ {
name: "MaskedSubUint16x16", name: "MaskedSubUint16x16",
argLen: 3, argLen: 3,
@ -58673,6 +58932,11 @@ var opcodeTable = [...]opInfo{
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
{
name: "SaturatedUnsignedSignedPairDotProdUint16x16",
argLen: 2,
generic: true,
},
{ {
name: "SubUint16x16", name: "SubUint16x16",
argLen: 2, argLen: 2,
@ -58800,6 +59064,11 @@ var opcodeTable = [...]opInfo{
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{
name: "MaskedSaturatedUnsignedSignedPairDotProdUint16x32",
argLen: 3,
generic: true,
},
{ {
name: "MaskedSubUint16x32", name: "MaskedSubUint16x32",
argLen: 3, argLen: 3,
@ -58845,6 +59114,11 @@ var opcodeTable = [...]opInfo{
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
{
name: "SaturatedUnsignedSignedPairDotProdUint16x32",
argLen: 2,
generic: true,
},
{ {
name: "SubUint16x32", name: "SubUint16x32",
argLen: 2, argLen: 2,
@ -58978,6 +59252,11 @@ var opcodeTable = [...]opInfo{
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{
name: "MaskedSaturatedUnsignedSignedPairDotProdUint16x8",
argLen: 3,
generic: true,
},
{ {
name: "MaskedSubUint16x8", name: "MaskedSubUint16x8",
argLen: 3, argLen: 3,
@ -59039,6 +59318,11 @@ var opcodeTable = [...]opInfo{
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
{
name: "SaturatedUnsignedSignedPairDotProdUint16x8",
argLen: 2,
generic: true,
},
{ {
name: "SubUint16x8", name: "SubUint16x8",
argLen: 2, argLen: 2,
@ -60293,6 +60577,11 @@ var opcodeTable = [...]opInfo{
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
{
name: "SaturatedUnsignedSignedPairDotProdUint8x16",
argLen: 2,
generic: true,
},
{ {
name: "SubUint8x16", name: "SubUint8x16",
argLen: 2, argLen: 2,
@ -60471,6 +60760,11 @@ var opcodeTable = [...]opInfo{
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
{
name: "SaturatedUnsignedSignedPairDotProdUint8x32",
argLen: 2,
generic: true,
},
{ {
name: "SubUint8x32", name: "SubUint8x32",
argLen: 2, argLen: 2,

View file

@ -1263,6 +1263,8 @@ func rewriteValueAMD64(v *Value) bool {
case OpDivFloat64x8: case OpDivFloat64x8:
v.Op = OpAMD64VDIVPD512 v.Op = OpAMD64VDIVPD512
return true return true
case OpDotProdBroadcastFloat64x2:
return rewriteValueAMD64_OpDotProdBroadcastFloat64x2(v)
case OpEq16: case OpEq16:
return rewriteValueAMD64_OpEq16(v) return rewriteValueAMD64_OpEq16(v)
case OpEq32: case OpEq32:
@ -2694,6 +2696,12 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpMaskedOrUint64x4(v) return rewriteValueAMD64_OpMaskedOrUint64x4(v)
case OpMaskedOrUint64x8: case OpMaskedOrUint64x8:
return rewriteValueAMD64_OpMaskedOrUint64x8(v) return rewriteValueAMD64_OpMaskedOrUint64x8(v)
case OpMaskedPairDotProdInt16x16:
return rewriteValueAMD64_OpMaskedPairDotProdInt16x16(v)
case OpMaskedPairDotProdInt16x32:
return rewriteValueAMD64_OpMaskedPairDotProdInt16x32(v)
case OpMaskedPairDotProdInt16x8:
return rewriteValueAMD64_OpMaskedPairDotProdInt16x8(v)
case OpMaskedPopCountInt16x16: case OpMaskedPopCountInt16x16:
return rewriteValueAMD64_OpMaskedPopCountInt16x16(v) return rewriteValueAMD64_OpMaskedPopCountInt16x16(v)
case OpMaskedPopCountInt16x32: case OpMaskedPopCountInt16x32:
@ -2814,6 +2822,12 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpMaskedSaturatedSubUint8x32(v) return rewriteValueAMD64_OpMaskedSaturatedSubUint8x32(v)
case OpMaskedSaturatedSubUint8x64: case OpMaskedSaturatedSubUint8x64:
return rewriteValueAMD64_OpMaskedSaturatedSubUint8x64(v) return rewriteValueAMD64_OpMaskedSaturatedSubUint8x64(v)
case OpMaskedSaturatedUnsignedSignedPairDotProdUint16x16:
return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x16(v)
case OpMaskedSaturatedUnsignedSignedPairDotProdUint16x32:
return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x32(v)
case OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8:
return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8(v)
case OpMaskedSqrtFloat32x16: case OpMaskedSqrtFloat32x16:
return rewriteValueAMD64_OpMaskedSqrtFloat32x16(v) return rewriteValueAMD64_OpMaskedSqrtFloat32x16(v)
case OpMaskedSqrtFloat32x4: case OpMaskedSqrtFloat32x4:
@ -3476,6 +3490,15 @@ func rewriteValueAMD64(v *Value) bool {
case OpOrUint8x32: case OpOrUint8x32:
v.Op = OpAMD64VPOR256 v.Op = OpAMD64VPOR256
return true return true
case OpPairDotProdInt16x16:
v.Op = OpAMD64VPMADDWD256
return true
case OpPairDotProdInt16x32:
v.Op = OpAMD64VPMADDWD512
return true
case OpPairDotProdInt16x8:
v.Op = OpAMD64VPMADDWD128
return true
case OpPairwiseAddFloat32x4: case OpPairwiseAddFloat32x4:
v.Op = OpAMD64VHADDPS128 v.Op = OpAMD64VHADDPS128
return true return true
@ -3838,6 +3861,21 @@ func rewriteValueAMD64(v *Value) bool {
case OpSaturatedSubUint8x64: case OpSaturatedSubUint8x64:
v.Op = OpAMD64VPSUBSB512 v.Op = OpAMD64VPSUBSB512
return true return true
case OpSaturatedUnsignedSignedPairDotProdUint16x16:
v.Op = OpAMD64VPMADDUBSW256
return true
case OpSaturatedUnsignedSignedPairDotProdUint16x32:
v.Op = OpAMD64VPMADDUBSW512
return true
case OpSaturatedUnsignedSignedPairDotProdUint16x8:
v.Op = OpAMD64VPMADDUBSW128
return true
case OpSaturatedUnsignedSignedPairDotProdUint8x16:
v.Op = OpAMD64VPMADDUBSW128
return true
case OpSaturatedUnsignedSignedPairDotProdUint8x32:
v.Op = OpAMD64VPMADDUBSW256
return true
case OpSelect0: case OpSelect0:
return rewriteValueAMD64_OpSelect0(v) return rewriteValueAMD64_OpSelect0(v)
case OpSelect1: case OpSelect1:
@ -29568,6 +29606,20 @@ func rewriteValueAMD64_OpDiv8u(v *Value) bool {
return true return true
} }
} }
func rewriteValueAMD64_OpDotProdBroadcastFloat64x2(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (DotProdBroadcastFloat64x2 x y)
// result: (VDPPD128 [127] x y)
for {
x := v_0
y := v_1
v.reset(OpAMD64VDPPD128)
v.AuxInt = int8ToAuxInt(127)
v.AddArg2(x, y)
return true
}
}
func rewriteValueAMD64_OpEq16(v *Value) bool { func rewriteValueAMD64_OpEq16(v *Value) bool {
v_1 := v.Args[1] v_1 := v.Args[1]
v_0 := v.Args[0] v_0 := v.Args[0]
@ -42720,6 +42772,60 @@ func rewriteValueAMD64_OpMaskedOrUint64x8(v *Value) bool {
return true return true
} }
} }
func rewriteValueAMD64_OpMaskedPairDotProdInt16x16(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (MaskedPairDotProdInt16x16 x y mask)
// result: (VPMADDWDMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
v.reset(OpAMD64VPMADDWDMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
func rewriteValueAMD64_OpMaskedPairDotProdInt16x32(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (MaskedPairDotProdInt16x32 x y mask)
// result: (VPMADDWDMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
v.reset(OpAMD64VPMADDWDMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
func rewriteValueAMD64_OpMaskedPairDotProdInt16x8(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (MaskedPairDotProdInt16x8 x y mask)
// result: (VPMADDWDMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
v.reset(OpAMD64VPMADDWDMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
func rewriteValueAMD64_OpMaskedPopCountInt16x16(v *Value) bool { func rewriteValueAMD64_OpMaskedPopCountInt16x16(v *Value) bool {
v_1 := v.Args[1] v_1 := v.Args[1]
v_0 := v.Args[0] v_0 := v.Args[0]
@ -43752,6 +43858,60 @@ func rewriteValueAMD64_OpMaskedSaturatedSubUint8x64(v *Value) bool {
return true return true
} }
} }
func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x16(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (MaskedSaturatedUnsignedSignedPairDotProdUint16x16 x y mask)
// result: (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
v.reset(OpAMD64VPMADDUBSWMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x32(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (MaskedSaturatedUnsignedSignedPairDotProdUint16x32 x y mask)
// result: (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
v.reset(OpAMD64VPMADDUBSWMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (MaskedSaturatedUnsignedSignedPairDotProdUint16x8 x y mask)
// result: (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
v.reset(OpAMD64VPMADDUBSWMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
func rewriteValueAMD64_OpMaskedSqrtFloat32x16(v *Value) bool { func rewriteValueAMD64_OpMaskedSqrtFloat32x16(v *Value) bool {
v_1 := v.Args[1] v_1 := v.Args[1]
v_0 := v.Args[0] v_0 := v.Args[0]

View file

@ -155,6 +155,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Float64x2.And", opLen2(ssa.OpAndFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x2.And", opLen2(ssa.OpAndFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x2.AndNot", opLen2(ssa.OpAndNotFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x2.AndNot", opLen2(ssa.OpAndNotFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x2.Div", opLen2(ssa.OpDivFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x2.Div", opLen2(ssa.OpDivFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x2.DotProdBroadcast", opLen2(ssa.OpDotProdBroadcastFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x2.Equal", opLen2(ssa.OpEqualFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x2.Equal", opLen2(ssa.OpEqualFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x2.Greater", opLen2(ssa.OpGreaterFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x2.Greater", opLen2(ssa.OpGreaterFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x2.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x2.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat64x2, types.TypeVec128), sys.AMD64)
@ -235,6 +236,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Int16x16.MulLow", opLen2(ssa.OpMulLowInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x16.MulLow", opLen2(ssa.OpMulLowInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x16.NotEqual", opLen2(ssa.OpNotEqualInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x16.NotEqual", opLen2(ssa.OpNotEqualInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x16.Or", opLen2(ssa.OpOrInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x16.Or", opLen2(ssa.OpOrInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x16.PairDotProd", opLen2(ssa.OpPairDotProdInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x16.PairwiseAdd", opLen2(ssa.OpPairwiseAddInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x16.PairwiseAdd", opLen2(ssa.OpPairwiseAddInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x16.PairwiseSub", opLen2(ssa.OpPairwiseSubInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x16.PairwiseSub", opLen2(ssa.OpPairwiseSubInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt16x16, types.TypeVec256), sys.AMD64)
@ -257,6 +259,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Int16x32.MulHigh", opLen2(ssa.OpMulHighInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.MulHigh", opLen2(ssa.OpMulHighInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x32.MulLow", opLen2(ssa.OpMulLowInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.MulLow", opLen2(ssa.OpMulLowInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x32.NotEqual", opLen2(ssa.OpNotEqualInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.NotEqual", opLen2(ssa.OpNotEqualInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x32.PairDotProd", opLen2(ssa.OpPairDotProdInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x32.SaturatedSub", opLen2(ssa.OpSaturatedSubInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.SaturatedSub", opLen2(ssa.OpSaturatedSubInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x32.Sub", opLen2(ssa.OpSubInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.Sub", opLen2(ssa.OpSubInt16x32, types.TypeVec512), sys.AMD64)
@ -276,6 +279,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Int16x8.MulLow", opLen2(ssa.OpMulLowInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.MulLow", opLen2(ssa.OpMulLowInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.NotEqual", opLen2(ssa.OpNotEqualInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.NotEqual", opLen2(ssa.OpNotEqualInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.Or", opLen2(ssa.OpOrInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.Or", opLen2(ssa.OpOrInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.PairDotProd", opLen2(ssa.OpPairDotProdInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.PairwiseAdd", opLen2(ssa.OpPairwiseAddInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.PairwiseAdd", opLen2(ssa.OpPairwiseAddInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.PairwiseSub", opLen2(ssa.OpPairwiseSubInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.PairwiseSub", opLen2(ssa.OpPairwiseSubInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt16x8, types.TypeVec128), sys.AMD64)
@ -469,6 +473,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint16x16.PairwiseSub", opLen2(ssa.OpPairwiseSubUint16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint16x16.PairwiseSub", opLen2(ssa.OpPairwiseSubUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint16x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x16.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint16x16.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x16.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x16.Sub", opLen2(ssa.OpSubUint16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint16x16.Sub", opLen2(ssa.OpSubUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x16.Xor", opLen2(ssa.OpXorUint16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint16x16.Xor", opLen2(ssa.OpXorUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.Add", opLen2(ssa.OpAddUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint16x32.Add", opLen2(ssa.OpAddUint16x32, types.TypeVec512), sys.AMD64)
@ -485,6 +490,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint16x32.NotEqual", opLen2(ssa.OpNotEqualUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint16x32.NotEqual", opLen2(ssa.OpNotEqualUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint16x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x32.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint16x32.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x32.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x32.Sub", opLen2(ssa.OpSubUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint16x32.Sub", opLen2(ssa.OpSubUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x8.Add", opLen2(ssa.OpAddUint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint16x8.Add", opLen2(ssa.OpAddUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.And", opLen2(ssa.OpAndUint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint16x8.And", opLen2(ssa.OpAndUint16x8, types.TypeVec128), sys.AMD64)
@ -505,6 +511,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint16x8.PairwiseSub", opLen2(ssa.OpPairwiseSubUint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint16x8.PairwiseSub", opLen2(ssa.OpPairwiseSubUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint16x8.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint16x8.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.Sub", opLen2(ssa.OpSubUint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint16x8.Sub", opLen2(ssa.OpSubUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.Xor", opLen2(ssa.OpXorUint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint16x8.Xor", opLen2(ssa.OpXorUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x16.Add", opLen2(ssa.OpAddUint32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint32x16.Add", opLen2(ssa.OpAddUint32x16, types.TypeVec512), sys.AMD64)
@ -622,6 +629,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint8x16.Or", opLen2(ssa.OpOrUint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint8x16.Or", opLen2(ssa.OpOrUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint8x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x16.SaturatedSub", opLen2(ssa.OpSaturatedSubUint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint8x16.SaturatedSub", opLen2(ssa.OpSaturatedSubUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x16.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x16.Sub", opLen2(ssa.OpSubUint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint8x16.Sub", opLen2(ssa.OpSubUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x16.Xor", opLen2(ssa.OpXorUint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint8x16.Xor", opLen2(ssa.OpXorUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.Add", opLen2(ssa.OpAddUint8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x32.Add", opLen2(ssa.OpAddUint8x32, types.TypeVec256), sys.AMD64)
@ -640,6 +648,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint8x32.Or", opLen2(ssa.OpOrUint8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x32.Or", opLen2(ssa.OpOrUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x32.SaturatedSub", opLen2(ssa.OpSaturatedSubUint8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x32.SaturatedSub", opLen2(ssa.OpSaturatedSubUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x32.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x32.Sub", opLen2(ssa.OpSubUint8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x32.Sub", opLen2(ssa.OpSubUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x32.Xor", opLen2(ssa.OpXorUint8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x32.Xor", opLen2(ssa.OpXorUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x64.Add", opLen2(ssa.OpAddUint8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint8x64.Add", opLen2(ssa.OpAddUint8x64, types.TypeVec512), sys.AMD64)
@ -775,6 +784,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Int16x16.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x16.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x16.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x16.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x16.MaskedPairDotProd", opLen3(ssa.OpMaskedPairDotProdInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x16.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x16.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x16.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x16.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x16.MaskedSub", opLen3(ssa.OpMaskedSubInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x16.MaskedSub", opLen3(ssa.OpMaskedSubInt16x16, types.TypeVec256), sys.AMD64)
@ -789,6 +799,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Int16x32.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x32.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x32.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x32.MaskedPairDotProd", opLen3(ssa.OpMaskedPairDotProdInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x32.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x32.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x32.MaskedSub", opLen3(ssa.OpMaskedSubInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.MaskedSub", opLen3(ssa.OpMaskedSubInt16x32, types.TypeVec512), sys.AMD64)
@ -803,6 +814,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Int16x8.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.MaskedPairDotProd", opLen3(ssa.OpMaskedPairDotProdInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.MaskedSub", opLen3(ssa.OpMaskedSubInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.MaskedSub", opLen3(ssa.OpMaskedSubInt16x8, types.TypeVec128), sys.AMD64)
@ -948,6 +960,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint16x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint16x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x16.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint16x16.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x16.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint16x16.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x16.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x16.MaskedSub", opLen3(ssa.OpMaskedSubUint16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint16x16.MaskedSub", opLen3(ssa.OpMaskedSubUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.MaskedAdd", opLen3(ssa.OpMaskedAddUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint16x32.MaskedAdd", opLen3(ssa.OpMaskedAddUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x32.MaskedAverage", opLen3(ssa.OpMaskedAverageUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint16x32.MaskedAverage", opLen3(ssa.OpMaskedAverageUint16x32, types.TypeVec512), sys.AMD64)
@ -962,6 +975,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint16x32.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint16x32.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x32.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint16x32.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x32.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint16x32.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x32.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x32.MaskedSub", opLen3(ssa.OpMaskedSubUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint16x32.MaskedSub", opLen3(ssa.OpMaskedSubUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x8.MaskedAdd", opLen3(ssa.OpMaskedAddUint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint16x8.MaskedAdd", opLen3(ssa.OpMaskedAddUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.MaskedAverage", opLen3(ssa.OpMaskedAverageUint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint16x8.MaskedAverage", opLen3(ssa.OpMaskedAverageUint16x8, types.TypeVec128), sys.AMD64)
@ -976,6 +990,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint16x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint16x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint16x8.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint16x8.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.MaskedSub", opLen3(ssa.OpMaskedSubUint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint16x8.MaskedSub", opLen3(ssa.OpMaskedSubUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x16.MaskedAdd", opLen3(ssa.OpMaskedAddUint32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint32x16.MaskedAdd", opLen3(ssa.OpMaskedAddUint32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x16.MaskedAnd", opLen3(ssa.OpMaskedAndUint32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint32x16.MaskedAnd", opLen3(ssa.OpMaskedAndUint32x16, types.TypeVec512), sys.AMD64)

View file

@ -486,6 +486,11 @@ func (x Float64x2) AndNot(y Float64x2) Float64x2
// Asm: VDIVPD, CPU Feature: AVX // Asm: VDIVPD, CPU Feature: AVX
func (x Float64x2) Div(y Float64x2) Float64x2 func (x Float64x2) Div(y Float64x2) Float64x2
// Multiply all the elements and add them together; the result is a broadcast of the dot product
//
// Asm: VDPPD, CPU Feature: AVX
func (x Float64x2) DotProdBroadcast(y Float64x2) Float64x2
// Predicate immediate is 0 if it has; // Predicate immediate is 0 if it has;
// //
// Asm: VCMPPD, CPU Feature: AVX // Asm: VCMPPD, CPU Feature: AVX
@ -792,6 +797,11 @@ func (x Int16x16) NotEqual(y Int16x16) Mask16x16
// Asm: VPOR, CPU Feature: AVX2 // Asm: VPOR, CPU Feature: AVX2
func (x Int16x16) Or(y Int16x16) Int16x16 func (x Int16x16) Or(y Int16x16) Int16x16
// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size
//
// Asm: VPMADDWD, CPU Feature: AVX2
func (x Int16x16) PairDotProd(y Int16x16) Int32x8
// Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target // Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target
// //
// Asm: VPHADDW, CPU Feature: AVX2 // Asm: VPHADDW, CPU Feature: AVX2
@ -882,6 +892,11 @@ func (x Int16x32) MulLow(y Int16x32) Int16x32
// Asm: VPCMPW, CPU Feature: AVX512EVEX // Asm: VPCMPW, CPU Feature: AVX512EVEX
func (x Int16x32) NotEqual(y Int16x32) Mask16x32 func (x Int16x32) NotEqual(y Int16x32) Mask16x32
// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size
//
// Asm: VPMADDWD, CPU Feature: AVX512EVEX
func (x Int16x32) PairDotProd(y Int16x32) Int32x16
// Asm: VPADDSW, CPU Feature: AVX512EVEX // Asm: VPADDSW, CPU Feature: AVX512EVEX
func (x Int16x32) SaturatedAdd(y Int16x32) Int16x32 func (x Int16x32) SaturatedAdd(y Int16x32) Int16x32
@ -955,6 +970,11 @@ func (x Int16x8) NotEqual(y Int16x8) Mask16x8
// Asm: VPOR, CPU Feature: AVX // Asm: VPOR, CPU Feature: AVX
func (x Int16x8) Or(y Int16x8) Int16x8 func (x Int16x8) Or(y Int16x8) Int16x8
// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size
//
// Asm: VPMADDWD, CPU Feature: AVX
func (x Int16x8) PairDotProd(y Int16x8) Int32x4
// Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target // Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target
// //
// Asm: VPHADDW, CPU Feature: AVX // Asm: VPHADDW, CPU Feature: AVX
@ -1698,6 +1718,11 @@ func (x Uint16x16) SaturatedAdd(y Uint16x16) Uint16x16
// Asm: VPSUBSW, CPU Feature: AVX2 // Asm: VPSUBSW, CPU Feature: AVX2
func (x Uint16x16) SaturatedSub(y Uint16x16) Uint16x16 func (x Uint16x16) SaturatedSub(y Uint16x16) Uint16x16
// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size
//
// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
func (x Uint16x16) SaturatedUnsignedSignedPairDotProd(y Int16x16) Int16x16
// Asm: VPSUBW, CPU Feature: AVX2 // Asm: VPSUBW, CPU Feature: AVX2
func (x Uint16x16) Sub(y Uint16x16) Uint16x16 func (x Uint16x16) Sub(y Uint16x16) Uint16x16
@ -1760,6 +1785,11 @@ func (x Uint16x32) SaturatedAdd(y Uint16x32) Uint16x32
// Asm: VPSUBSW, CPU Feature: AVX512EVEX // Asm: VPSUBSW, CPU Feature: AVX512EVEX
func (x Uint16x32) SaturatedSub(y Uint16x32) Uint16x32 func (x Uint16x32) SaturatedSub(y Uint16x32) Uint16x32
// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size
//
// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
func (x Uint16x32) SaturatedUnsignedSignedPairDotProd(y Int16x32) Int16x32
// Asm: VPSUBW, CPU Feature: AVX512EVEX // Asm: VPSUBW, CPU Feature: AVX512EVEX
func (x Uint16x32) Sub(y Uint16x32) Uint16x32 func (x Uint16x32) Sub(y Uint16x32) Uint16x32
@ -1838,6 +1868,11 @@ func (x Uint16x8) SaturatedAdd(y Uint16x8) Uint16x8
// Asm: VPSUBSW, CPU Feature: AVX // Asm: VPSUBSW, CPU Feature: AVX
func (x Uint16x8) SaturatedSub(y Uint16x8) Uint16x8 func (x Uint16x8) SaturatedSub(y Uint16x8) Uint16x8
// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size
//
// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
func (x Uint16x8) SaturatedUnsignedSignedPairDotProd(y Int16x8) Int16x8
// Asm: VPSUBW, CPU Feature: AVX // Asm: VPSUBW, CPU Feature: AVX
func (x Uint16x8) Sub(y Uint16x8) Uint16x8 func (x Uint16x8) Sub(y Uint16x8) Uint16x8
@ -2291,6 +2326,11 @@ func (x Uint8x16) SaturatedAdd(y Uint8x16) Uint8x16
// Asm: VPSUBSB, CPU Feature: AVX // Asm: VPSUBSB, CPU Feature: AVX
func (x Uint8x16) SaturatedSub(y Uint8x16) Uint8x16 func (x Uint8x16) SaturatedSub(y Uint8x16) Uint8x16
// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size
//
// Asm: VPMADDUBSW, CPU Feature: AVX
func (x Uint8x16) SaturatedUnsignedSignedPairDotProd(y Int8x16) Int16x8
// Asm: VPSUBB, CPU Feature: AVX // Asm: VPSUBB, CPU Feature: AVX
func (x Uint8x16) Sub(y Uint8x16) Uint8x16 func (x Uint8x16) Sub(y Uint8x16) Uint8x16
@ -2357,6 +2397,11 @@ func (x Uint8x32) SaturatedAdd(y Uint8x32) Uint8x32
// Asm: VPSUBSB, CPU Feature: AVX2 // Asm: VPSUBSB, CPU Feature: AVX2
func (x Uint8x32) SaturatedSub(y Uint8x32) Uint8x32 func (x Uint8x32) SaturatedSub(y Uint8x32) Uint8x32
// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size
//
// Asm: VPMADDUBSW, CPU Feature: AVX2
func (x Uint8x32) SaturatedUnsignedSignedPairDotProd(y Int8x32) Int16x16
// Asm: VPSUBB, CPU Feature: AVX2 // Asm: VPSUBB, CPU Feature: AVX2
func (x Uint8x32) Sub(y Uint8x32) Uint8x32 func (x Uint8x32) Sub(y Uint8x32) Uint8x32
@ -2874,6 +2919,11 @@ func (x Int16x16) MaskedMulLow(y Int16x16, z Mask16x16) Int16x16
// Asm: VPCMPW, CPU Feature: AVX512EVEX // Asm: VPCMPW, CPU Feature: AVX512EVEX
func (x Int16x16) MaskedNotEqual(y Int16x16, z Mask16x16) Mask16x16 func (x Int16x16) MaskedNotEqual(y Int16x16, z Mask16x16) Mask16x16
// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size
//
// Asm: VPMADDWD, CPU Feature: AVX512EVEX
func (x Int16x16) MaskedPairDotProd(y Int16x16, z Mask16x16) Int32x8
// Asm: VPADDSW, CPU Feature: AVX512EVEX // Asm: VPADDSW, CPU Feature: AVX512EVEX
func (x Int16x16) MaskedSaturatedAdd(y Int16x16, z Mask16x16) Int16x16 func (x Int16x16) MaskedSaturatedAdd(y Int16x16, z Mask16x16) Int16x16
@ -2932,6 +2982,11 @@ func (x Int16x32) MaskedMulLow(y Int16x32, z Mask16x32) Int16x32
// Asm: VPCMPW, CPU Feature: AVX512EVEX // Asm: VPCMPW, CPU Feature: AVX512EVEX
func (x Int16x32) MaskedNotEqual(y Int16x32, z Mask16x32) Mask16x32 func (x Int16x32) MaskedNotEqual(y Int16x32, z Mask16x32) Mask16x32
// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size
//
// Asm: VPMADDWD, CPU Feature: AVX512EVEX
func (x Int16x32) MaskedPairDotProd(y Int16x32, z Mask16x32) Int32x16
// Asm: VPADDSW, CPU Feature: AVX512EVEX // Asm: VPADDSW, CPU Feature: AVX512EVEX
func (x Int16x32) MaskedSaturatedAdd(y Int16x32, z Mask16x32) Int16x32 func (x Int16x32) MaskedSaturatedAdd(y Int16x32, z Mask16x32) Int16x32
@ -2990,6 +3045,11 @@ func (x Int16x8) MaskedMulLow(y Int16x8, z Mask16x8) Int16x8
// Asm: VPCMPW, CPU Feature: AVX512EVEX // Asm: VPCMPW, CPU Feature: AVX512EVEX
func (x Int16x8) MaskedNotEqual(y Int16x8, z Mask16x8) Mask16x8 func (x Int16x8) MaskedNotEqual(y Int16x8, z Mask16x8) Mask16x8
// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size
//
// Asm: VPMADDWD, CPU Feature: AVX512EVEX
func (x Int16x8) MaskedPairDotProd(y Int16x8, z Mask16x8) Int32x4
// Asm: VPADDSW, CPU Feature: AVX512EVEX // Asm: VPADDSW, CPU Feature: AVX512EVEX
func (x Int16x8) MaskedSaturatedAdd(y Int16x8, z Mask16x8) Int16x8 func (x Int16x8) MaskedSaturatedAdd(y Int16x8, z Mask16x8) Int16x8
@ -3565,6 +3625,11 @@ func (x Uint16x16) MaskedSaturatedAdd(y Uint16x16, z Mask16x16) Uint16x16
// Asm: VPSUBSW, CPU Feature: AVX512EVEX // Asm: VPSUBSW, CPU Feature: AVX512EVEX
func (x Uint16x16) MaskedSaturatedSub(y Uint16x16, z Mask16x16) Uint16x16 func (x Uint16x16) MaskedSaturatedSub(y Uint16x16, z Mask16x16) Uint16x16
// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size
//
// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
func (x Uint16x16) MaskedSaturatedUnsignedSignedPairDotProd(y Int16x16, z Mask16x16) Int16x16
// Asm: VPSUBW, CPU Feature: AVX512EVEX // Asm: VPSUBW, CPU Feature: AVX512EVEX
func (x Uint16x16) MaskedSub(y Uint16x16, z Mask16x16) Uint16x16 func (x Uint16x16) MaskedSub(y Uint16x16, z Mask16x16) Uint16x16
@ -3621,6 +3686,11 @@ func (x Uint16x32) MaskedSaturatedAdd(y Uint16x32, z Mask16x32) Uint16x32
// Asm: VPSUBSW, CPU Feature: AVX512EVEX // Asm: VPSUBSW, CPU Feature: AVX512EVEX
func (x Uint16x32) MaskedSaturatedSub(y Uint16x32, z Mask16x32) Uint16x32 func (x Uint16x32) MaskedSaturatedSub(y Uint16x32, z Mask16x32) Uint16x32
// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size
//
// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
func (x Uint16x32) MaskedSaturatedUnsignedSignedPairDotProd(y Int16x32, z Mask16x32) Int16x32
// Asm: VPSUBW, CPU Feature: AVX512EVEX // Asm: VPSUBW, CPU Feature: AVX512EVEX
func (x Uint16x32) MaskedSub(y Uint16x32, z Mask16x32) Uint16x32 func (x Uint16x32) MaskedSub(y Uint16x32, z Mask16x32) Uint16x32
@ -3677,6 +3747,11 @@ func (x Uint16x8) MaskedSaturatedAdd(y Uint16x8, z Mask16x8) Uint16x8
// Asm: VPSUBSW, CPU Feature: AVX512EVEX // Asm: VPSUBSW, CPU Feature: AVX512EVEX
func (x Uint16x8) MaskedSaturatedSub(y Uint16x8, z Mask16x8) Uint16x8 func (x Uint16x8) MaskedSaturatedSub(y Uint16x8, z Mask16x8) Uint16x8
// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size
//
// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
func (x Uint16x8) MaskedSaturatedUnsignedSignedPairDotProd(y Int16x8, z Mask16x8) Int16x8
// Asm: VPSUBW, CPU Feature: AVX512EVEX // Asm: VPSUBW, CPU Feature: AVX512EVEX
func (x Uint16x8) MaskedSub(y Uint16x8, z Mask16x8) Uint16x8 func (x Uint16x8) MaskedSub(y Uint16x8, z Mask16x8) Uint16x8