[dev.simd] cmd/compile: fix signature error of PairDotProdAccumulate.

This CL is generated by CL 682135.

Change-Id: I6f004b2eca6323f1ff22555c85db993386f24c6c
Reviewed-on: https://go-review.googlesource.com/c/go/+/682155
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
Junyang Shao 2025-06-16 22:53:36 +00:00
parent 3a4d10bfca
commit 1be5eb2686
7 changed files with 174 additions and 210 deletions

View file

@ -1115,9 +1115,9 @@
(MaskedSaturatedSubUint8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask)) (MaskedSaturatedSubUint8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
(MaskedSaturatedSubUint8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask)) (MaskedSaturatedSubUint8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
(MaskedSaturatedSubUint8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask)) (MaskedSaturatedSubUint8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
(MaskedSaturatedUnsignedSignedPairDotProdUint16x16 x y mask) => (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask)) (MaskedSaturatedUnsignedSignedPairDotProdUint8x16 x y mask) => (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(MaskedSaturatedUnsignedSignedPairDotProdUint16x32 x y mask) => (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask)) (MaskedSaturatedUnsignedSignedPairDotProdUint8x32 x y mask) => (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(MaskedSaturatedUnsignedSignedPairDotProdUint16x8 x y mask) => (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask)) (MaskedSaturatedUnsignedSignedPairDotProdUint8x64 x y mask) => (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask)) (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask)) (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask)) (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
@ -1450,11 +1450,9 @@
(SaturatedSubUint8x16 ...) => (VPSUBSB128 ...) (SaturatedSubUint8x16 ...) => (VPSUBSB128 ...)
(SaturatedSubUint8x32 ...) => (VPSUBSB256 ...) (SaturatedSubUint8x32 ...) => (VPSUBSB256 ...)
(SaturatedSubUint8x64 ...) => (VPSUBSB512 ...) (SaturatedSubUint8x64 ...) => (VPSUBSB512 ...)
(SaturatedUnsignedSignedPairDotProdUint16x16 ...) => (VPMADDUBSW256 ...)
(SaturatedUnsignedSignedPairDotProdUint16x32 ...) => (VPMADDUBSW512 ...)
(SaturatedUnsignedSignedPairDotProdUint16x8 ...) => (VPMADDUBSW128 ...)
(SaturatedUnsignedSignedPairDotProdUint8x16 ...) => (VPMADDUBSW128 ...) (SaturatedUnsignedSignedPairDotProdUint8x16 ...) => (VPMADDUBSW128 ...)
(SaturatedUnsignedSignedPairDotProdUint8x32 ...) => (VPMADDUBSW256 ...) (SaturatedUnsignedSignedPairDotProdUint8x32 ...) => (VPMADDUBSW256 ...)
(SaturatedUnsignedSignedPairDotProdUint8x64 ...) => (VPMADDUBSW512 ...)
(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSDS512 ...) (SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSDS512 ...)
(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 ...) => (VPDPBUSDS128 ...) (SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 ...) => (VPDPBUSDS128 ...)
(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 ...) => (VPDPBUSDS256 ...) (SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 ...) => (VPDPBUSDS256 ...)

View file

@ -705,7 +705,6 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
{name: "VPMAXUWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMAXUWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMINUWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINUWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULHUWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULHUWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMADDUBSWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMADDUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMAXUW256", argLength: 2, reg: fp21, asm: "VPMAXUW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMAXUW256", argLength: 2, reg: fp21, asm: "VPMAXUW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMINUW256", argLength: 2, reg: fp21, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINUW256", argLength: 2, reg: fp21, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULHUW256", argLength: 2, reg: fp21, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULHUW256", argLength: 2, reg: fp21, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false},
@ -714,17 +713,14 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
{name: "VPMAXUWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMAXUWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMINUWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINUWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULHUWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMULHUWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMADDUBSWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMADDUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMAXUW512", argLength: 2, reg: fp21, asm: "VPMAXUW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMAXUW512", argLength: 2, reg: fp21, asm: "VPMAXUW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMINUW512", argLength: 2, reg: fp21, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINUW512", argLength: 2, reg: fp21, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULHUW512", argLength: 2, reg: fp21, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMULHUW512", argLength: 2, reg: fp21, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMADDUBSW512", argLength: 2, reg: fp21, asm: "VPMADDUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPAVGW128", argLength: 2, reg: fp21, asm: "VPAVGW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPAVGW128", argLength: 2, reg: fp21, asm: "VPAVGW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPAVGWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPAVGW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPAVGWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPAVGW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMAXUWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMAXUWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMINUWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINUWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULHUWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULHUWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMADDUBSWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMADDUBSW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMAXUW128", argLength: 2, reg: fp21, asm: "VPMAXUW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMAXUW128", argLength: 2, reg: fp21, asm: "VPMAXUW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMINUW128", argLength: 2, reg: fp21, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINUW128", argLength: 2, reg: fp21, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULHUW128", argLength: 2, reg: fp21, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULHUW128", argLength: 2, reg: fp21, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false},
@ -762,6 +758,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
{name: "VPAVGBMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPAVGB", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPAVGBMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPAVGB", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMAXUBMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUB", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMAXUBMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUB", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMINUBMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMINUB", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINUBMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMINUB", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMADDUBSWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMADDUBSW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMAXUB128", argLength: 2, reg: fp21, asm: "VPMAXUB", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMAXUB128", argLength: 2, reg: fp21, asm: "VPMAXUB", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMINUB128", argLength: 2, reg: fp21, asm: "VPMINUB", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINUB128", argLength: 2, reg: fp21, asm: "VPMINUB", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMADDUBSW128", argLength: 2, reg: fp21, asm: "VPMADDUBSW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMADDUBSW128", argLength: 2, reg: fp21, asm: "VPMADDUBSW", commutative: false, typ: "Vec128", resultInArg0: false},
@ -769,6 +766,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
{name: "VPAVGBMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPAVGB", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPAVGBMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPAVGB", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMAXUBMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUB", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMAXUBMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUB", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMINUBMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMINUB", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINUBMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMINUB", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMADDUBSWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMADDUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMAXUB256", argLength: 2, reg: fp21, asm: "VPMAXUB", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMAXUB256", argLength: 2, reg: fp21, asm: "VPMAXUB", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMINUB256", argLength: 2, reg: fp21, asm: "VPMINUB", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINUB256", argLength: 2, reg: fp21, asm: "VPMINUB", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMADDUBSW256", argLength: 2, reg: fp21, asm: "VPMADDUBSW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMADDUBSW256", argLength: 2, reg: fp21, asm: "VPMADDUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
@ -776,8 +774,10 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
{name: "VPAVGBMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPAVGB", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPAVGBMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPAVGB", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMAXUBMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUB", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMAXUBMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUB", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMINUBMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMINUB", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINUBMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMINUB", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMADDUBSWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMADDUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMAXUB512", argLength: 2, reg: fp21, asm: "VPMAXUB", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMAXUB512", argLength: 2, reg: fp21, asm: "VPMAXUB", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMINUB512", argLength: 2, reg: fp21, asm: "VPMINUB", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINUB512", argLength: 2, reg: fp21, asm: "VPMINUB", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMADDUBSW512", argLength: 2, reg: fp21, asm: "VPMADDUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VRNDSCALEPS512", argLength: 1, reg: fp11, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VRNDSCALEPS512", argLength: 1, reg: fp11, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VREDUCEPS512", argLength: 1, reg: fp11, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VREDUCEPS512", argLength: 1, reg: fp11, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VCMPPS512", argLength: 2, reg: fp2k1, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VCMPPS512", argLength: 2, reg: fp2k1, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},

View file

@ -979,7 +979,6 @@ func simdGenericOps() []opData {
{name: "MaskedPopCountUint16x16", argLength: 2, commutative: false}, {name: "MaskedPopCountUint16x16", argLength: 2, commutative: false},
{name: "MaskedSaturatedAddUint16x16", argLength: 3, commutative: true}, {name: "MaskedSaturatedAddUint16x16", argLength: 3, commutative: true},
{name: "MaskedSaturatedSubUint16x16", argLength: 3, commutative: false}, {name: "MaskedSaturatedSubUint16x16", argLength: 3, commutative: false},
{name: "MaskedSaturatedUnsignedSignedPairDotProdUint16x16", argLength: 3, commutative: false},
{name: "MaskedSubUint16x16", argLength: 3, commutative: false}, {name: "MaskedSubUint16x16", argLength: 3, commutative: false},
{name: "MaxUint16x16", argLength: 2, commutative: true}, {name: "MaxUint16x16", argLength: 2, commutative: true},
{name: "MinUint16x16", argLength: 2, commutative: true}, {name: "MinUint16x16", argLength: 2, commutative: true},
@ -991,7 +990,6 @@ func simdGenericOps() []opData {
{name: "PopCountUint16x16", argLength: 1, commutative: false}, {name: "PopCountUint16x16", argLength: 1, commutative: false},
{name: "SaturatedAddUint16x16", argLength: 2, commutative: true}, {name: "SaturatedAddUint16x16", argLength: 2, commutative: true},
{name: "SaturatedSubUint16x16", argLength: 2, commutative: false}, {name: "SaturatedSubUint16x16", argLength: 2, commutative: false},
{name: "SaturatedUnsignedSignedPairDotProdUint16x16", argLength: 2, commutative: false},
{name: "SubUint16x16", argLength: 2, commutative: false}, {name: "SubUint16x16", argLength: 2, commutative: false},
{name: "XorUint16x16", argLength: 2, commutative: true}, {name: "XorUint16x16", argLength: 2, commutative: true},
{name: "AddUint16x32", argLength: 2, commutative: true}, {name: "AddUint16x32", argLength: 2, commutative: true},
@ -1015,7 +1013,6 @@ func simdGenericOps() []opData {
{name: "MaskedPopCountUint16x32", argLength: 2, commutative: false}, {name: "MaskedPopCountUint16x32", argLength: 2, commutative: false},
{name: "MaskedSaturatedAddUint16x32", argLength: 3, commutative: true}, {name: "MaskedSaturatedAddUint16x32", argLength: 3, commutative: true},
{name: "MaskedSaturatedSubUint16x32", argLength: 3, commutative: false}, {name: "MaskedSaturatedSubUint16x32", argLength: 3, commutative: false},
{name: "MaskedSaturatedUnsignedSignedPairDotProdUint16x32", argLength: 3, commutative: false},
{name: "MaskedSubUint16x32", argLength: 3, commutative: false}, {name: "MaskedSubUint16x32", argLength: 3, commutative: false},
{name: "MaxUint16x32", argLength: 2, commutative: true}, {name: "MaxUint16x32", argLength: 2, commutative: true},
{name: "MinUint16x32", argLength: 2, commutative: true}, {name: "MinUint16x32", argLength: 2, commutative: true},
@ -1024,7 +1021,6 @@ func simdGenericOps() []opData {
{name: "PopCountUint16x32", argLength: 1, commutative: false}, {name: "PopCountUint16x32", argLength: 1, commutative: false},
{name: "SaturatedAddUint16x32", argLength: 2, commutative: true}, {name: "SaturatedAddUint16x32", argLength: 2, commutative: true},
{name: "SaturatedSubUint16x32", argLength: 2, commutative: false}, {name: "SaturatedSubUint16x32", argLength: 2, commutative: false},
{name: "SaturatedUnsignedSignedPairDotProdUint16x32", argLength: 2, commutative: false},
{name: "SubUint16x32", argLength: 2, commutative: false}, {name: "SubUint16x32", argLength: 2, commutative: false},
{name: "AddUint16x8", argLength: 2, commutative: true}, {name: "AddUint16x8", argLength: 2, commutative: true},
{name: "AndUint16x8", argLength: 2, commutative: true}, {name: "AndUint16x8", argLength: 2, commutative: true},
@ -1049,7 +1045,6 @@ func simdGenericOps() []opData {
{name: "MaskedPopCountUint16x8", argLength: 2, commutative: false}, {name: "MaskedPopCountUint16x8", argLength: 2, commutative: false},
{name: "MaskedSaturatedAddUint16x8", argLength: 3, commutative: true}, {name: "MaskedSaturatedAddUint16x8", argLength: 3, commutative: true},
{name: "MaskedSaturatedSubUint16x8", argLength: 3, commutative: false}, {name: "MaskedSaturatedSubUint16x8", argLength: 3, commutative: false},
{name: "MaskedSaturatedUnsignedSignedPairDotProdUint16x8", argLength: 3, commutative: false},
{name: "MaskedSubUint16x8", argLength: 3, commutative: false}, {name: "MaskedSubUint16x8", argLength: 3, commutative: false},
{name: "MaxUint16x8", argLength: 2, commutative: true}, {name: "MaxUint16x8", argLength: 2, commutative: true},
{name: "MinUint16x8", argLength: 2, commutative: true}, {name: "MinUint16x8", argLength: 2, commutative: true},
@ -1061,7 +1056,6 @@ func simdGenericOps() []opData {
{name: "PopCountUint16x8", argLength: 1, commutative: false}, {name: "PopCountUint16x8", argLength: 1, commutative: false},
{name: "SaturatedAddUint16x8", argLength: 2, commutative: true}, {name: "SaturatedAddUint16x8", argLength: 2, commutative: true},
{name: "SaturatedSubUint16x8", argLength: 2, commutative: false}, {name: "SaturatedSubUint16x8", argLength: 2, commutative: false},
{name: "SaturatedUnsignedSignedPairDotProdUint16x8", argLength: 2, commutative: false},
{name: "SubUint16x8", argLength: 2, commutative: false}, {name: "SubUint16x8", argLength: 2, commutative: false},
{name: "XorUint16x8", argLength: 2, commutative: true}, {name: "XorUint16x8", argLength: 2, commutative: true},
{name: "AddUint32x16", argLength: 2, commutative: true}, {name: "AddUint32x16", argLength: 2, commutative: true},
@ -1290,6 +1284,7 @@ func simdGenericOps() []opData {
{name: "MaskedPopCountUint8x16", argLength: 2, commutative: false}, {name: "MaskedPopCountUint8x16", argLength: 2, commutative: false},
{name: "MaskedSaturatedAddUint8x16", argLength: 3, commutative: true}, {name: "MaskedSaturatedAddUint8x16", argLength: 3, commutative: true},
{name: "MaskedSaturatedSubUint8x16", argLength: 3, commutative: false}, {name: "MaskedSaturatedSubUint8x16", argLength: 3, commutative: false},
{name: "MaskedSaturatedUnsignedSignedPairDotProdUint8x16", argLength: 3, commutative: false},
{name: "MaskedSubUint8x16", argLength: 3, commutative: false}, {name: "MaskedSubUint8x16", argLength: 3, commutative: false},
{name: "MaxUint8x16", argLength: 2, commutative: true}, {name: "MaxUint8x16", argLength: 2, commutative: true},
{name: "MinUint8x16", argLength: 2, commutative: true}, {name: "MinUint8x16", argLength: 2, commutative: true},
@ -1323,6 +1318,7 @@ func simdGenericOps() []opData {
{name: "MaskedPopCountUint8x32", argLength: 2, commutative: false}, {name: "MaskedPopCountUint8x32", argLength: 2, commutative: false},
{name: "MaskedSaturatedAddUint8x32", argLength: 3, commutative: true}, {name: "MaskedSaturatedAddUint8x32", argLength: 3, commutative: true},
{name: "MaskedSaturatedSubUint8x32", argLength: 3, commutative: false}, {name: "MaskedSaturatedSubUint8x32", argLength: 3, commutative: false},
{name: "MaskedSaturatedUnsignedSignedPairDotProdUint8x32", argLength: 3, commutative: false},
{name: "MaskedSubUint8x32", argLength: 3, commutative: false}, {name: "MaskedSubUint8x32", argLength: 3, commutative: false},
{name: "MaxUint8x32", argLength: 2, commutative: true}, {name: "MaxUint8x32", argLength: 2, commutative: true},
{name: "MinUint8x32", argLength: 2, commutative: true}, {name: "MinUint8x32", argLength: 2, commutative: true},
@ -1354,6 +1350,7 @@ func simdGenericOps() []opData {
{name: "MaskedPopCountUint8x64", argLength: 2, commutative: false}, {name: "MaskedPopCountUint8x64", argLength: 2, commutative: false},
{name: "MaskedSaturatedAddUint8x64", argLength: 3, commutative: true}, {name: "MaskedSaturatedAddUint8x64", argLength: 3, commutative: true},
{name: "MaskedSaturatedSubUint8x64", argLength: 3, commutative: false}, {name: "MaskedSaturatedSubUint8x64", argLength: 3, commutative: false},
{name: "MaskedSaturatedUnsignedSignedPairDotProdUint8x64", argLength: 3, commutative: false},
{name: "MaskedSubUint8x64", argLength: 3, commutative: false}, {name: "MaskedSubUint8x64", argLength: 3, commutative: false},
{name: "MaxUint8x64", argLength: 2, commutative: true}, {name: "MaxUint8x64", argLength: 2, commutative: true},
{name: "MinUint8x64", argLength: 2, commutative: true}, {name: "MinUint8x64", argLength: 2, commutative: true},
@ -1361,6 +1358,7 @@ func simdGenericOps() []opData {
{name: "PopCountUint8x64", argLength: 1, commutative: false}, {name: "PopCountUint8x64", argLength: 1, commutative: false},
{name: "SaturatedAddUint8x64", argLength: 2, commutative: true}, {name: "SaturatedAddUint8x64", argLength: 2, commutative: true},
{name: "SaturatedSubUint8x64", argLength: 2, commutative: false}, {name: "SaturatedSubUint8x64", argLength: 2, commutative: false},
{name: "SaturatedUnsignedSignedPairDotProdUint8x64", argLength: 2, commutative: false},
{name: "SubUint8x64", argLength: 2, commutative: false}, {name: "SubUint8x64", argLength: 2, commutative: false},
{name: "CeilSuppressExceptionWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, {name: "CeilSuppressExceptionWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
{name: "CeilWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, {name: "CeilWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},

View file

@ -1898,7 +1898,6 @@ const (
OpAMD64VPMAXUWMasked256 OpAMD64VPMAXUWMasked256
OpAMD64VPMINUWMasked256 OpAMD64VPMINUWMasked256
OpAMD64VPMULHUWMasked256 OpAMD64VPMULHUWMasked256
OpAMD64VPMADDUBSWMasked256
OpAMD64VPMAXUW256 OpAMD64VPMAXUW256
OpAMD64VPMINUW256 OpAMD64VPMINUW256
OpAMD64VPMULHUW256 OpAMD64VPMULHUW256
@ -1907,17 +1906,14 @@ const (
OpAMD64VPMAXUWMasked512 OpAMD64VPMAXUWMasked512
OpAMD64VPMINUWMasked512 OpAMD64VPMINUWMasked512
OpAMD64VPMULHUWMasked512 OpAMD64VPMULHUWMasked512
OpAMD64VPMADDUBSWMasked512
OpAMD64VPMAXUW512 OpAMD64VPMAXUW512
OpAMD64VPMINUW512 OpAMD64VPMINUW512
OpAMD64VPMULHUW512 OpAMD64VPMULHUW512
OpAMD64VPMADDUBSW512
OpAMD64VPAVGW128 OpAMD64VPAVGW128
OpAMD64VPAVGWMasked128 OpAMD64VPAVGWMasked128
OpAMD64VPMAXUWMasked128 OpAMD64VPMAXUWMasked128
OpAMD64VPMINUWMasked128 OpAMD64VPMINUWMasked128
OpAMD64VPMULHUWMasked128 OpAMD64VPMULHUWMasked128
OpAMD64VPMADDUBSWMasked128
OpAMD64VPMAXUW128 OpAMD64VPMAXUW128
OpAMD64VPMINUW128 OpAMD64VPMINUW128
OpAMD64VPMULHUW128 OpAMD64VPMULHUW128
@ -1955,6 +1951,7 @@ const (
OpAMD64VPAVGBMasked128 OpAMD64VPAVGBMasked128
OpAMD64VPMAXUBMasked128 OpAMD64VPMAXUBMasked128
OpAMD64VPMINUBMasked128 OpAMD64VPMINUBMasked128
OpAMD64VPMADDUBSWMasked128
OpAMD64VPMAXUB128 OpAMD64VPMAXUB128
OpAMD64VPMINUB128 OpAMD64VPMINUB128
OpAMD64VPMADDUBSW128 OpAMD64VPMADDUBSW128
@ -1962,6 +1959,7 @@ const (
OpAMD64VPAVGBMasked256 OpAMD64VPAVGBMasked256
OpAMD64VPMAXUBMasked256 OpAMD64VPMAXUBMasked256
OpAMD64VPMINUBMasked256 OpAMD64VPMINUBMasked256
OpAMD64VPMADDUBSWMasked256
OpAMD64VPMAXUB256 OpAMD64VPMAXUB256
OpAMD64VPMINUB256 OpAMD64VPMINUB256
OpAMD64VPMADDUBSW256 OpAMD64VPMADDUBSW256
@ -1969,8 +1967,10 @@ const (
OpAMD64VPAVGBMasked512 OpAMD64VPAVGBMasked512
OpAMD64VPMAXUBMasked512 OpAMD64VPMAXUBMasked512
OpAMD64VPMINUBMasked512 OpAMD64VPMINUBMasked512
OpAMD64VPMADDUBSWMasked512
OpAMD64VPMAXUB512 OpAMD64VPMAXUB512
OpAMD64VPMINUB512 OpAMD64VPMINUB512
OpAMD64VPMADDUBSW512
OpAMD64VRNDSCALEPS512 OpAMD64VRNDSCALEPS512
OpAMD64VREDUCEPS512 OpAMD64VREDUCEPS512
OpAMD64VCMPPS512 OpAMD64VCMPPS512
@ -5262,7 +5262,6 @@ const (
OpMaskedPopCountUint16x16 OpMaskedPopCountUint16x16
OpMaskedSaturatedAddUint16x16 OpMaskedSaturatedAddUint16x16
OpMaskedSaturatedSubUint16x16 OpMaskedSaturatedSubUint16x16
OpMaskedSaturatedUnsignedSignedPairDotProdUint16x16
OpMaskedSubUint16x16 OpMaskedSubUint16x16
OpMaxUint16x16 OpMaxUint16x16
OpMinUint16x16 OpMinUint16x16
@ -5274,7 +5273,6 @@ const (
OpPopCountUint16x16 OpPopCountUint16x16
OpSaturatedAddUint16x16 OpSaturatedAddUint16x16
OpSaturatedSubUint16x16 OpSaturatedSubUint16x16
OpSaturatedUnsignedSignedPairDotProdUint16x16
OpSubUint16x16 OpSubUint16x16
OpXorUint16x16 OpXorUint16x16
OpAddUint16x32 OpAddUint16x32
@ -5298,7 +5296,6 @@ const (
OpMaskedPopCountUint16x32 OpMaskedPopCountUint16x32
OpMaskedSaturatedAddUint16x32 OpMaskedSaturatedAddUint16x32
OpMaskedSaturatedSubUint16x32 OpMaskedSaturatedSubUint16x32
OpMaskedSaturatedUnsignedSignedPairDotProdUint16x32
OpMaskedSubUint16x32 OpMaskedSubUint16x32
OpMaxUint16x32 OpMaxUint16x32
OpMinUint16x32 OpMinUint16x32
@ -5307,7 +5304,6 @@ const (
OpPopCountUint16x32 OpPopCountUint16x32
OpSaturatedAddUint16x32 OpSaturatedAddUint16x32
OpSaturatedSubUint16x32 OpSaturatedSubUint16x32
OpSaturatedUnsignedSignedPairDotProdUint16x32
OpSubUint16x32 OpSubUint16x32
OpAddUint16x8 OpAddUint16x8
OpAndUint16x8 OpAndUint16x8
@ -5332,7 +5328,6 @@ const (
OpMaskedPopCountUint16x8 OpMaskedPopCountUint16x8
OpMaskedSaturatedAddUint16x8 OpMaskedSaturatedAddUint16x8
OpMaskedSaturatedSubUint16x8 OpMaskedSaturatedSubUint16x8
OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8
OpMaskedSubUint16x8 OpMaskedSubUint16x8
OpMaxUint16x8 OpMaxUint16x8
OpMinUint16x8 OpMinUint16x8
@ -5344,7 +5339,6 @@ const (
OpPopCountUint16x8 OpPopCountUint16x8
OpSaturatedAddUint16x8 OpSaturatedAddUint16x8
OpSaturatedSubUint16x8 OpSaturatedSubUint16x8
OpSaturatedUnsignedSignedPairDotProdUint16x8
OpSubUint16x8 OpSubUint16x8
OpXorUint16x8 OpXorUint16x8
OpAddUint32x16 OpAddUint32x16
@ -5573,6 +5567,7 @@ const (
OpMaskedPopCountUint8x16 OpMaskedPopCountUint8x16
OpMaskedSaturatedAddUint8x16 OpMaskedSaturatedAddUint8x16
OpMaskedSaturatedSubUint8x16 OpMaskedSaturatedSubUint8x16
OpMaskedSaturatedUnsignedSignedPairDotProdUint8x16
OpMaskedSubUint8x16 OpMaskedSubUint8x16
OpMaxUint8x16 OpMaxUint8x16
OpMinUint8x16 OpMinUint8x16
@ -5606,6 +5601,7 @@ const (
OpMaskedPopCountUint8x32 OpMaskedPopCountUint8x32
OpMaskedSaturatedAddUint8x32 OpMaskedSaturatedAddUint8x32
OpMaskedSaturatedSubUint8x32 OpMaskedSaturatedSubUint8x32
OpMaskedSaturatedUnsignedSignedPairDotProdUint8x32
OpMaskedSubUint8x32 OpMaskedSubUint8x32
OpMaxUint8x32 OpMaxUint8x32
OpMinUint8x32 OpMinUint8x32
@ -5637,6 +5633,7 @@ const (
OpMaskedPopCountUint8x64 OpMaskedPopCountUint8x64
OpMaskedSaturatedAddUint8x64 OpMaskedSaturatedAddUint8x64
OpMaskedSaturatedSubUint8x64 OpMaskedSaturatedSubUint8x64
OpMaskedSaturatedUnsignedSignedPairDotProdUint8x64
OpMaskedSubUint8x64 OpMaskedSubUint8x64
OpMaxUint8x64 OpMaxUint8x64
OpMinUint8x64 OpMinUint8x64
@ -5644,6 +5641,7 @@ const (
OpPopCountUint8x64 OpPopCountUint8x64
OpSaturatedAddUint8x64 OpSaturatedAddUint8x64
OpSaturatedSubUint8x64 OpSaturatedSubUint8x64
OpSaturatedUnsignedSignedPairDotProdUint8x64
OpSubUint8x64 OpSubUint8x64
OpCeilSuppressExceptionWithPrecisionFloat32x16 OpCeilSuppressExceptionWithPrecisionFloat32x16
OpCeilWithPrecisionFloat32x16 OpCeilWithPrecisionFloat32x16
@ -29231,21 +29229,6 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPMADDUBSWMasked256",
argLen: 3,
asm: x86.AVPMADDUBSW,
reg: regInfo{
inputs: []inputInfo{
{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VPMAXUW256", name: "VPMAXUW256",
argLen: 2, argLen: 2,
@ -29370,21 +29353,6 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPMADDUBSWMasked512",
argLen: 3,
asm: x86.AVPMADDUBSW,
reg: regInfo{
inputs: []inputInfo{
{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VPMAXUW512", name: "VPMAXUW512",
argLen: 2, argLen: 2,
@ -29430,20 +29398,6 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPMADDUBSW512",
argLen: 2,
asm: x86.AVPMADDUBSW,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VPAVGW128", name: "VPAVGW128",
argLen: 2, argLen: 2,
@ -29523,21 +29477,6 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPMADDUBSWMasked128",
argLen: 3,
asm: x86.AVPMADDUBSW,
reg: regInfo{
inputs: []inputInfo{
{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VPMAXUW128", name: "VPMAXUW128",
argLen: 2, argLen: 2,
@ -30111,6 +30050,21 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPMADDUBSWMasked128",
argLen: 3,
asm: x86.AVPMADDUBSW,
reg: regInfo{
inputs: []inputInfo{
{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VPMAXUB128", name: "VPMAXUB128",
argLen: 2, argLen: 2,
@ -30218,6 +30172,21 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPMADDUBSWMasked256",
argLen: 3,
asm: x86.AVPMADDUBSW,
reg: regInfo{
inputs: []inputInfo{
{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VPMAXUB256", name: "VPMAXUB256",
argLen: 2, argLen: 2,
@ -30325,6 +30294,21 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPMADDUBSWMasked512",
argLen: 3,
asm: x86.AVPMADDUBSW,
reg: regInfo{
inputs: []inputInfo{
{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VPMAXUB512", name: "VPMAXUB512",
argLen: 2, argLen: 2,
@ -30355,6 +30339,20 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPMADDUBSW512",
argLen: 2,
asm: x86.AVPMADDUBSW,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VRNDSCALEPS512", name: "VRNDSCALEPS512",
auxType: auxInt8, auxType: auxInt8,
@ -64134,11 +64132,6 @@ var opcodeTable = [...]opInfo{
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{
name: "MaskedSaturatedUnsignedSignedPairDotProdUint16x16",
argLen: 3,
generic: true,
},
{ {
name: "MaskedSubUint16x16", name: "MaskedSubUint16x16",
argLen: 3, argLen: 3,
@ -64200,11 +64193,6 @@ var opcodeTable = [...]opInfo{
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
{
name: "SaturatedUnsignedSignedPairDotProdUint16x16",
argLen: 2,
generic: true,
},
{ {
name: "SubUint16x16", name: "SubUint16x16",
argLen: 2, argLen: 2,
@ -64332,11 +64320,6 @@ var opcodeTable = [...]opInfo{
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{
name: "MaskedSaturatedUnsignedSignedPairDotProdUint16x32",
argLen: 3,
generic: true,
},
{ {
name: "MaskedSubUint16x32", name: "MaskedSubUint16x32",
argLen: 3, argLen: 3,
@ -64382,11 +64365,6 @@ var opcodeTable = [...]opInfo{
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
{
name: "SaturatedUnsignedSignedPairDotProdUint16x32",
argLen: 2,
generic: true,
},
{ {
name: "SubUint16x32", name: "SubUint16x32",
argLen: 2, argLen: 2,
@ -64519,11 +64497,6 @@ var opcodeTable = [...]opInfo{
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{
name: "MaskedSaturatedUnsignedSignedPairDotProdUint16x8",
argLen: 3,
generic: true,
},
{ {
name: "MaskedSubUint16x8", name: "MaskedSubUint16x8",
argLen: 3, argLen: 3,
@ -64585,11 +64558,6 @@ var opcodeTable = [...]opInfo{
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
{
name: "SaturatedUnsignedSignedPairDotProdUint16x8",
argLen: 2,
generic: true,
},
{ {
name: "SubUint16x8", name: "SubUint16x8",
argLen: 2, argLen: 2,
@ -65846,6 +65814,11 @@ var opcodeTable = [...]opInfo{
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{
name: "MaskedSaturatedUnsignedSignedPairDotProdUint8x16",
argLen: 3,
generic: true,
},
{ {
name: "MaskedSubUint8x16", name: "MaskedSubUint8x16",
argLen: 3, argLen: 3,
@ -66028,6 +66001,11 @@ var opcodeTable = [...]opInfo{
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{
name: "MaskedSaturatedUnsignedSignedPairDotProdUint8x32",
argLen: 3,
generic: true,
},
{ {
name: "MaskedSubUint8x32", name: "MaskedSubUint8x32",
argLen: 3, argLen: 3,
@ -66199,6 +66177,11 @@ var opcodeTable = [...]opInfo{
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{
name: "MaskedSaturatedUnsignedSignedPairDotProdUint8x64",
argLen: 3,
generic: true,
},
{ {
name: "MaskedSubUint8x64", name: "MaskedSubUint8x64",
argLen: 3, argLen: 3,
@ -66238,6 +66221,11 @@ var opcodeTable = [...]opInfo{
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
{
name: "SaturatedUnsignedSignedPairDotProdUint8x64",
argLen: 2,
generic: true,
},
{ {
name: "SubUint8x64", name: "SubUint8x64",
argLen: 2, argLen: 2,

View file

@ -3374,12 +3374,12 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpMaskedSaturatedSubUint8x32(v) return rewriteValueAMD64_OpMaskedSaturatedSubUint8x32(v)
case OpMaskedSaturatedSubUint8x64: case OpMaskedSaturatedSubUint8x64:
return rewriteValueAMD64_OpMaskedSaturatedSubUint8x64(v) return rewriteValueAMD64_OpMaskedSaturatedSubUint8x64(v)
case OpMaskedSaturatedUnsignedSignedPairDotProdUint16x16: case OpMaskedSaturatedUnsignedSignedPairDotProdUint8x16:
return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x16(v) return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint8x16(v)
case OpMaskedSaturatedUnsignedSignedPairDotProdUint16x32: case OpMaskedSaturatedUnsignedSignedPairDotProdUint8x32:
return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x32(v) return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint8x32(v)
case OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8: case OpMaskedSaturatedUnsignedSignedPairDotProdUint8x64:
return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8(v) return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint8x64(v)
case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16: case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16:
return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16(v) return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16(v)
case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4: case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4:
@ -4455,21 +4455,15 @@ func rewriteValueAMD64(v *Value) bool {
case OpSaturatedSubUint8x64: case OpSaturatedSubUint8x64:
v.Op = OpAMD64VPSUBSB512 v.Op = OpAMD64VPSUBSB512
return true return true
case OpSaturatedUnsignedSignedPairDotProdUint16x16:
v.Op = OpAMD64VPMADDUBSW256
return true
case OpSaturatedUnsignedSignedPairDotProdUint16x32:
v.Op = OpAMD64VPMADDUBSW512
return true
case OpSaturatedUnsignedSignedPairDotProdUint16x8:
v.Op = OpAMD64VPMADDUBSW128
return true
case OpSaturatedUnsignedSignedPairDotProdUint8x16: case OpSaturatedUnsignedSignedPairDotProdUint8x16:
v.Op = OpAMD64VPMADDUBSW128 v.Op = OpAMD64VPMADDUBSW128
return true return true
case OpSaturatedUnsignedSignedPairDotProdUint8x32: case OpSaturatedUnsignedSignedPairDotProdUint8x32:
v.Op = OpAMD64VPMADDUBSW256 v.Op = OpAMD64VPMADDUBSW256
return true return true
case OpSaturatedUnsignedSignedPairDotProdUint8x64:
v.Op = OpAMD64VPMADDUBSW512
return true
case OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16: case OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16:
v.Op = OpAMD64VPDPBUSDS512 v.Op = OpAMD64VPDPBUSDS512
return true return true
@ -46801,12 +46795,30 @@ func rewriteValueAMD64_OpMaskedSaturatedSubUint8x64(v *Value) bool {
return true return true
} }
} }
func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x16(v *Value) bool { func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint8x16(v *Value) bool {
v_2 := v.Args[2] v_2 := v.Args[2]
v_1 := v.Args[1] v_1 := v.Args[1]
v_0 := v.Args[0] v_0 := v.Args[0]
b := v.Block b := v.Block
// match: (MaskedSaturatedUnsignedSignedPairDotProdUint16x16 x y mask) // match: (MaskedSaturatedUnsignedSignedPairDotProdUint8x16 x y mask)
// result: (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
v.reset(OpAMD64VPMADDUBSWMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint8x32(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (MaskedSaturatedUnsignedSignedPairDotProdUint8x32 x y mask)
// result: (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask)) // result: (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
for { for {
x := v_0 x := v_0
@ -46819,12 +46831,12 @@ func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x16(v *Va
return true return true
} }
} }
func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x32(v *Value) bool { func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint8x64(v *Value) bool {
v_2 := v.Args[2] v_2 := v.Args[2]
v_1 := v.Args[1] v_1 := v.Args[1]
v_0 := v.Args[0] v_0 := v.Args[0]
b := v.Block b := v.Block
// match: (MaskedSaturatedUnsignedSignedPairDotProdUint16x32 x y mask) // match: (MaskedSaturatedUnsignedSignedPairDotProdUint8x64 x y mask)
// result: (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask)) // result: (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
for { for {
x := v_0 x := v_0
@ -46837,24 +46849,6 @@ func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x32(v *Va
return true return true
} }
} }
func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (MaskedSaturatedUnsignedSignedPairDotProdUint16x8 x y mask)
// result: (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
v.reset(OpAMD64VPMADDUBSWMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16(v *Value) bool { func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16(v *Value) bool {
v_3 := v.Args[3] v_3 := v.Args[3]
v_2 := v.Args[2] v_2 := v.Args[2]

View file

@ -1126,9 +1126,9 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint16x8.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint16x8.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint16x16.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint16x32.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x8.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint8x16.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x32.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint8x64.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int32x4.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int32x8.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int32x16.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
@ -1463,9 +1463,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint16x32.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint16x32.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint8x16.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x32.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x8.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint8x64.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x16.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int32x4.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int32x8.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int32x16.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)

View file

@ -5962,17 +5962,17 @@ func (x Int16x32) MaskedPairDotProd(y Int16x32, z Mask16x32) Int32x16
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. // PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
// //
// Asm: VPDPWSSD, CPU Feature: AVX512EVEX // Asm: VPDPWSSD, CPU Feature: AVX512EVEX
func (x Int32x4) MaskedPairDotProdAccumulate(y Int16x8, z Int32x4, u Mask32x4) Int32x4 func (x Int32x4) MaskedPairDotProdAccumulate(y Int16x8, z Int16x8, u Mask32x4) Int32x4
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. // PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
// //
// Asm: VPDPWSSD, CPU Feature: AVX512EVEX // Asm: VPDPWSSD, CPU Feature: AVX512EVEX
func (x Int32x8) MaskedPairDotProdAccumulate(y Int16x16, z Int32x8, u Mask32x8) Int32x8 func (x Int32x8) MaskedPairDotProdAccumulate(y Int16x16, z Int16x16, u Mask32x8) Int32x8
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. // PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
// //
// Asm: VPDPWSSD, CPU Feature: AVX512EVEX // Asm: VPDPWSSD, CPU Feature: AVX512EVEX
func (x Int32x16) MaskedPairDotProdAccumulate(y Int16x32, z Int32x16, u Mask32x16) Int32x16 func (x Int32x16) MaskedPairDotProdAccumulate(y Int16x32, z Int16x32, u Mask32x16) Int32x16
/* MaskedPopCount */ /* MaskedPopCount */
@ -6239,17 +6239,17 @@ func (x Uint16x32) MaskedSaturatedAdd(y Uint16x32, z Mask16x32) Uint16x32
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
// //
// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX // Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
func (x Int32x4) MaskedSaturatedPairDotProdAccumulate(y Int16x8, z Int32x4, u Mask32x4) Int32x4 func (x Int32x4) MaskedSaturatedPairDotProdAccumulate(y Int16x8, z Int16x8, u Mask32x4) Int32x4
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
// //
// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX // Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
func (x Int32x8) MaskedSaturatedPairDotProdAccumulate(y Int16x16, z Int32x8, u Mask32x8) Int32x8 func (x Int32x8) MaskedSaturatedPairDotProdAccumulate(y Int16x16, z Int16x16, u Mask32x8) Int32x8
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
// //
// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX // Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
func (x Int32x16) MaskedSaturatedPairDotProdAccumulate(y Int16x32, z Int32x16, u Mask32x16) Int32x16 func (x Int32x16) MaskedSaturatedPairDotProdAccumulate(y Int16x32, z Int16x32, u Mask32x16) Int32x16
/* MaskedSaturatedSub */ /* MaskedSaturatedSub */
@ -6319,51 +6319,51 @@ func (x Uint16x32) MaskedSaturatedSub(y Uint16x32, z Mask16x32) Uint16x32
// yielding a vector of half as many elements with twice the input element size. // yielding a vector of half as many elements with twice the input element size.
// //
// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX // Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
func (x Uint16x8) MaskedSaturatedUnsignedSignedPairDotProd(y Int16x8, z Mask16x8) Int16x8 func (x Uint8x16) MaskedSaturatedUnsignedSignedPairDotProd(y Int8x16, z Mask16x8) Int16x8
// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, // SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
// yielding a vector of half as many elements with twice the input element size. // yielding a vector of half as many elements with twice the input element size.
// //
// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX // Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
func (x Uint16x16) MaskedSaturatedUnsignedSignedPairDotProd(y Int16x16, z Mask16x16) Int16x16 func (x Uint8x32) MaskedSaturatedUnsignedSignedPairDotProd(y Int8x32, z Mask16x16) Int16x16
// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, // SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
// yielding a vector of half as many elements with twice the input element size. // yielding a vector of half as many elements with twice the input element size.
// //
// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX // Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
func (x Uint16x32) MaskedSaturatedUnsignedSignedPairDotProd(y Int16x32, z Mask16x32) Int16x32 func (x Uint8x64) MaskedSaturatedUnsignedSignedPairDotProd(y Int8x64, z Mask16x32) Int16x32
/* MaskedSaturatedUnsignedSignedQuadDotProdAccumulate */ /* MaskedSaturatedUnsignedSignedQuadDotProdAccumulate */
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
// //
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX // Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
func (x Int32x4) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Int32x4 func (x Int32x4) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16, u Mask32x4) Int32x4
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
// //
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX // Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
func (x Int32x8) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Int32x8 func (x Int32x8) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32, u Mask32x8) Int32x8
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
// //
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX // Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
func (x Int32x16) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Int32x16 func (x Int32x16) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64, u Mask32x16) Int32x16
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
// //
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX // Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
func (x Uint32x4) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Uint32x4 func (x Uint32x4) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16, u Mask32x4) Uint32x4
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
// //
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX // Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
func (x Uint32x8) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Uint32x8 func (x Uint32x8) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32, u Mask32x8) Uint32x8
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
// //
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX // Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
func (x Uint32x16) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Uint32x16 func (x Uint32x16) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64, u Mask32x16) Uint32x16
/* MaskedSqrt */ /* MaskedSqrt */
@ -6630,32 +6630,32 @@ func (x Float64x8) MaskedTruncWithPrecision(imm uint8, y Mask64x8) Float64x8
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
// //
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX // Asm: VPDPBUSD, CPU Feature: AVX512EVEX
func (x Int32x4) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Int32x4 func (x Int32x4) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16, u Mask32x4) Int32x4
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
// //
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX // Asm: VPDPBUSD, CPU Feature: AVX512EVEX
func (x Int32x8) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Int32x8 func (x Int32x8) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32, u Mask32x8) Int32x8
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
// //
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX // Asm: VPDPBUSD, CPU Feature: AVX512EVEX
func (x Int32x16) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Int32x16 func (x Int32x16) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64, u Mask32x16) Int32x16
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
// //
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX // Asm: VPDPBUSD, CPU Feature: AVX512EVEX
func (x Uint32x4) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Uint32x4 func (x Uint32x4) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16, u Mask32x4) Uint32x4
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
// //
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX // Asm: VPDPBUSD, CPU Feature: AVX512EVEX
func (x Uint32x8) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Uint32x8 func (x Uint32x8) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32, u Mask32x8) Uint32x8
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
// //
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX // Asm: VPDPBUSD, CPU Feature: AVX512EVEX
func (x Uint32x16) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Uint32x16 func (x Uint32x16) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64, u Mask32x16) Uint32x16
/* MaskedXor */ /* MaskedXor */
@ -7597,17 +7597,17 @@ func (x Int16x32) PairDotProd(y Int16x32) Int32x16
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. // PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
// //
// Asm: VPDPWSSD, CPU Feature: AVX_VNNI // Asm: VPDPWSSD, CPU Feature: AVX_VNNI
func (x Int32x4) PairDotProdAccumulate(y Int32x4, z Int32x4) Int32x4 func (x Int32x4) PairDotProdAccumulate(y Int16x8, z Int16x8) Int32x4
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. // PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
// //
// Asm: VPDPWSSD, CPU Feature: AVX_VNNI // Asm: VPDPWSSD, CPU Feature: AVX_VNNI
func (x Int32x8) PairDotProdAccumulate(y Int32x8, z Int32x8) Int32x8 func (x Int32x8) PairDotProdAccumulate(y Int16x16, z Int16x16) Int32x8
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. // PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
// //
// Asm: VPDPWSSD, CPU Feature: AVX512EVEX // Asm: VPDPWSSD, CPU Feature: AVX512EVEX
func (x Int32x16) PairDotProdAccumulate(y Int16x32, z Int32x16) Int32x16 func (x Int32x16) PairDotProdAccumulate(y Int16x32, z Int16x32) Int32x16
/* PairwiseAdd */ /* PairwiseAdd */
@ -8048,17 +8048,17 @@ func (x Uint16x32) SaturatedAdd(y Uint16x32) Uint16x32
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
// //
// Asm: VPDPWSSDS, CPU Feature: AVX_VNNI // Asm: VPDPWSSDS, CPU Feature: AVX_VNNI
func (x Int32x4) SaturatedPairDotProdAccumulate(y Int32x4, z Int32x4) Int32x4 func (x Int32x4) SaturatedPairDotProdAccumulate(y Int16x8, z Int16x8) Int32x4
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
// //
// Asm: VPDPWSSDS, CPU Feature: AVX_VNNI // Asm: VPDPWSSDS, CPU Feature: AVX_VNNI
func (x Int32x8) SaturatedPairDotProdAccumulate(y Int32x8, z Int32x8) Int32x8 func (x Int32x8) SaturatedPairDotProdAccumulate(y Int16x16, z Int16x16) Int32x8
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
// //
// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX // Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
func (x Int32x16) SaturatedPairDotProdAccumulate(y Int16x32, z Int32x16) Int32x16 func (x Int32x16) SaturatedPairDotProdAccumulate(y Int16x32, z Int16x32) Int32x16
/* SaturatedPairwiseAdd */ /* SaturatedPairwiseAdd */
@ -8168,51 +8168,39 @@ func (x Uint8x32) SaturatedUnsignedSignedPairDotProd(y Int8x32) Int16x16
// yielding a vector of half as many elements with twice the input element size. // yielding a vector of half as many elements with twice the input element size.
// //
// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX // Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
func (x Uint16x8) SaturatedUnsignedSignedPairDotProd(y Int16x8) Int16x8 func (x Uint8x64) SaturatedUnsignedSignedPairDotProd(y Int8x64) Int16x32
// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
// yielding a vector of half as many elements with twice the input element size.
//
// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
func (x Uint16x16) SaturatedUnsignedSignedPairDotProd(y Int16x16) Int16x16
// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
// yielding a vector of half as many elements with twice the input element size.
//
// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
func (x Uint16x32) SaturatedUnsignedSignedPairDotProd(y Int16x32) Int16x32
/* SaturatedUnsignedSignedQuadDotProdAccumulate */ /* SaturatedUnsignedSignedQuadDotProdAccumulate */
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
// //
// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI // Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
func (x Int32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Int32x4 func (x Int32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16) Int32x4
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
// //
// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI // Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
func (x Int32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Int32x8 func (x Int32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32) Int32x8
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
// //
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX // Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
func (x Int32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16 func (x Int32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Int32x16
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
// //
// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI // Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
func (x Uint32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Uint32x4 func (x Uint32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16) Uint32x4
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
// //
// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI // Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Uint32x8 func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32) Uint32x8
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
// //
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX // Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Uint32x16 func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Uint32x16
/* Sign */ /* Sign */
@ -8543,32 +8531,32 @@ func (x Float64x8) TruncWithPrecision(imm8 uint8) Float64x8
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
// //
// Asm: VPDPBUSD, CPU Feature: AVX_VNNI // Asm: VPDPBUSD, CPU Feature: AVX_VNNI
func (x Int32x4) UnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Int32x4 func (x Int32x4) UnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16) Int32x4
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
// //
// Asm: VPDPBUSD, CPU Feature: AVX_VNNI // Asm: VPDPBUSD, CPU Feature: AVX_VNNI
func (x Int32x8) UnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Int32x8 func (x Int32x8) UnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32) Int32x8
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
// //
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX // Asm: VPDPBUSD, CPU Feature: AVX512EVEX
func (x Int32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16 func (x Int32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Int32x16
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
// //
// Asm: VPDPBUSD, CPU Feature: AVX_VNNI // Asm: VPDPBUSD, CPU Feature: AVX_VNNI
func (x Uint32x4) UnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Uint32x4 func (x Uint32x4) UnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16) Uint32x4
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
// //
// Asm: VPDPBUSD, CPU Feature: AVX_VNNI // Asm: VPDPBUSD, CPU Feature: AVX_VNNI
func (x Uint32x8) UnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Uint32x8 func (x Uint32x8) UnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32) Uint32x8
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
// //
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX // Asm: VPDPBUSD, CPU Feature: AVX512EVEX
func (x Uint32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Uint32x16 func (x Uint32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Uint32x16
/* Xor */ /* Xor */