mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile: fix signature error of PairDotProdAccumulate.
This CL is generated by CL 682135. Change-Id: I6f004b2eca6323f1ff22555c85db993386f24c6c Reviewed-on: https://go-review.googlesource.com/c/go/+/682155 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
parent
3a4d10bfca
commit
1be5eb2686
7 changed files with 174 additions and 210 deletions
|
|
@ -1115,9 +1115,9 @@
|
||||||
(MaskedSaturatedSubUint8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
(MaskedSaturatedSubUint8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||||
(MaskedSaturatedSubUint8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
(MaskedSaturatedSubUint8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||||
(MaskedSaturatedSubUint8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
|
(MaskedSaturatedSubUint8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
|
||||||
(MaskedSaturatedUnsignedSignedPairDotProdUint16x16 x y mask) => (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
(MaskedSaturatedUnsignedSignedPairDotProdUint8x16 x y mask) => (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||||
(MaskedSaturatedUnsignedSignedPairDotProdUint16x32 x y mask) => (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
|
(MaskedSaturatedUnsignedSignedPairDotProdUint8x32 x y mask) => (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||||
(MaskedSaturatedUnsignedSignedPairDotProdUint16x8 x y mask) => (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
(MaskedSaturatedUnsignedSignedPairDotProdUint8x64 x y mask) => (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
|
||||||
(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
|
(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||||
(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
|
(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||||
(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
|
(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||||
|
|
@ -1450,11 +1450,9 @@
|
||||||
(SaturatedSubUint8x16 ...) => (VPSUBSB128 ...)
|
(SaturatedSubUint8x16 ...) => (VPSUBSB128 ...)
|
||||||
(SaturatedSubUint8x32 ...) => (VPSUBSB256 ...)
|
(SaturatedSubUint8x32 ...) => (VPSUBSB256 ...)
|
||||||
(SaturatedSubUint8x64 ...) => (VPSUBSB512 ...)
|
(SaturatedSubUint8x64 ...) => (VPSUBSB512 ...)
|
||||||
(SaturatedUnsignedSignedPairDotProdUint16x16 ...) => (VPMADDUBSW256 ...)
|
|
||||||
(SaturatedUnsignedSignedPairDotProdUint16x32 ...) => (VPMADDUBSW512 ...)
|
|
||||||
(SaturatedUnsignedSignedPairDotProdUint16x8 ...) => (VPMADDUBSW128 ...)
|
|
||||||
(SaturatedUnsignedSignedPairDotProdUint8x16 ...) => (VPMADDUBSW128 ...)
|
(SaturatedUnsignedSignedPairDotProdUint8x16 ...) => (VPMADDUBSW128 ...)
|
||||||
(SaturatedUnsignedSignedPairDotProdUint8x32 ...) => (VPMADDUBSW256 ...)
|
(SaturatedUnsignedSignedPairDotProdUint8x32 ...) => (VPMADDUBSW256 ...)
|
||||||
|
(SaturatedUnsignedSignedPairDotProdUint8x64 ...) => (VPMADDUBSW512 ...)
|
||||||
(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSDS512 ...)
|
(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSDS512 ...)
|
||||||
(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 ...) => (VPDPBUSDS128 ...)
|
(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 ...) => (VPDPBUSDS128 ...)
|
||||||
(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 ...) => (VPDPBUSDS256 ...)
|
(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 ...) => (VPDPBUSDS256 ...)
|
||||||
|
|
|
||||||
|
|
@ -705,7 +705,6 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
|
||||||
{name: "VPMAXUWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUW", commutative: true, typ: "Vec256", resultInArg0: false},
|
{name: "VPMAXUWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUW", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPMINUWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false},
|
{name: "VPMINUWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPMULHUWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false},
|
{name: "VPMULHUWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPMADDUBSWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMADDUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
|
|
||||||
{name: "VPMAXUW256", argLength: 2, reg: fp21, asm: "VPMAXUW", commutative: true, typ: "Vec256", resultInArg0: false},
|
{name: "VPMAXUW256", argLength: 2, reg: fp21, asm: "VPMAXUW", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPMINUW256", argLength: 2, reg: fp21, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false},
|
{name: "VPMINUW256", argLength: 2, reg: fp21, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPMULHUW256", argLength: 2, reg: fp21, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false},
|
{name: "VPMULHUW256", argLength: 2, reg: fp21, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||||
|
|
@ -714,17 +713,14 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
|
||||||
{name: "VPMAXUWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUW", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPMAXUWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUW", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPMINUWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPMINUWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPMULHUWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPMULHUWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPMADDUBSWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMADDUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
|
|
||||||
{name: "VPMAXUW512", argLength: 2, reg: fp21, asm: "VPMAXUW", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPMAXUW512", argLength: 2, reg: fp21, asm: "VPMAXUW", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPMINUW512", argLength: 2, reg: fp21, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPMINUW512", argLength: 2, reg: fp21, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPMULHUW512", argLength: 2, reg: fp21, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPMULHUW512", argLength: 2, reg: fp21, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPMADDUBSW512", argLength: 2, reg: fp21, asm: "VPMADDUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
|
|
||||||
{name: "VPAVGW128", argLength: 2, reg: fp21, asm: "VPAVGW", commutative: true, typ: "Vec128", resultInArg0: false},
|
{name: "VPAVGW128", argLength: 2, reg: fp21, asm: "VPAVGW", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPAVGWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPAVGW", commutative: true, typ: "Vec128", resultInArg0: false},
|
{name: "VPAVGWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPAVGW", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPMAXUWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUW", commutative: true, typ: "Vec128", resultInArg0: false},
|
{name: "VPMAXUWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUW", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPMINUWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false},
|
{name: "VPMINUWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPMULHUWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false},
|
{name: "VPMULHUWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPMADDUBSWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMADDUBSW", commutative: false, typ: "Vec128", resultInArg0: false},
|
|
||||||
{name: "VPMAXUW128", argLength: 2, reg: fp21, asm: "VPMAXUW", commutative: true, typ: "Vec128", resultInArg0: false},
|
{name: "VPMAXUW128", argLength: 2, reg: fp21, asm: "VPMAXUW", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPMINUW128", argLength: 2, reg: fp21, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false},
|
{name: "VPMINUW128", argLength: 2, reg: fp21, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPMULHUW128", argLength: 2, reg: fp21, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false},
|
{name: "VPMULHUW128", argLength: 2, reg: fp21, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||||
|
|
@ -762,6 +758,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
|
||||||
{name: "VPAVGBMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPAVGB", commutative: true, typ: "Vec128", resultInArg0: false},
|
{name: "VPAVGBMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPAVGB", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPMAXUBMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUB", commutative: true, typ: "Vec128", resultInArg0: false},
|
{name: "VPMAXUBMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUB", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPMINUBMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMINUB", commutative: true, typ: "Vec128", resultInArg0: false},
|
{name: "VPMINUBMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMINUB", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||||
|
{name: "VPMADDUBSWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMADDUBSW", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPMAXUB128", argLength: 2, reg: fp21, asm: "VPMAXUB", commutative: true, typ: "Vec128", resultInArg0: false},
|
{name: "VPMAXUB128", argLength: 2, reg: fp21, asm: "VPMAXUB", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPMINUB128", argLength: 2, reg: fp21, asm: "VPMINUB", commutative: true, typ: "Vec128", resultInArg0: false},
|
{name: "VPMINUB128", argLength: 2, reg: fp21, asm: "VPMINUB", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPMADDUBSW128", argLength: 2, reg: fp21, asm: "VPMADDUBSW", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VPMADDUBSW128", argLength: 2, reg: fp21, asm: "VPMADDUBSW", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
|
|
@ -769,6 +766,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
|
||||||
{name: "VPAVGBMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPAVGB", commutative: true, typ: "Vec256", resultInArg0: false},
|
{name: "VPAVGBMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPAVGB", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPMAXUBMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUB", commutative: true, typ: "Vec256", resultInArg0: false},
|
{name: "VPMAXUBMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUB", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPMINUBMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMINUB", commutative: true, typ: "Vec256", resultInArg0: false},
|
{name: "VPMINUBMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMINUB", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||||
|
{name: "VPMADDUBSWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMADDUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPMAXUB256", argLength: 2, reg: fp21, asm: "VPMAXUB", commutative: true, typ: "Vec256", resultInArg0: false},
|
{name: "VPMAXUB256", argLength: 2, reg: fp21, asm: "VPMAXUB", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPMINUB256", argLength: 2, reg: fp21, asm: "VPMINUB", commutative: true, typ: "Vec256", resultInArg0: false},
|
{name: "VPMINUB256", argLength: 2, reg: fp21, asm: "VPMINUB", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPMADDUBSW256", argLength: 2, reg: fp21, asm: "VPMADDUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VPMADDUBSW256", argLength: 2, reg: fp21, asm: "VPMADDUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
|
|
@ -776,8 +774,10 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
|
||||||
{name: "VPAVGBMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPAVGB", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPAVGBMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPAVGB", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPMAXUBMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUB", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPMAXUBMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUB", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPMINUBMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMINUB", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPMINUBMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMINUB", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
|
{name: "VPMADDUBSWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMADDUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPMAXUB512", argLength: 2, reg: fp21, asm: "VPMAXUB", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPMAXUB512", argLength: 2, reg: fp21, asm: "VPMAXUB", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPMINUB512", argLength: 2, reg: fp21, asm: "VPMINUB", commutative: true, typ: "Vec512", resultInArg0: false},
|
{name: "VPMINUB512", argLength: 2, reg: fp21, asm: "VPMINUB", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||||
|
{name: "VPMADDUBSW512", argLength: 2, reg: fp21, asm: "VPMADDUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VRNDSCALEPS512", argLength: 1, reg: fp11, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VRNDSCALEPS512", argLength: 1, reg: fp11, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VREDUCEPS512", argLength: 1, reg: fp11, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VREDUCEPS512", argLength: 1, reg: fp11, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VCMPPS512", argLength: 2, reg: fp2k1, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
{name: "VCMPPS512", argLength: 2, reg: fp2k1, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
|
||||||
|
|
|
||||||
|
|
@ -979,7 +979,6 @@ func simdGenericOps() []opData {
|
||||||
{name: "MaskedPopCountUint16x16", argLength: 2, commutative: false},
|
{name: "MaskedPopCountUint16x16", argLength: 2, commutative: false},
|
||||||
{name: "MaskedSaturatedAddUint16x16", argLength: 3, commutative: true},
|
{name: "MaskedSaturatedAddUint16x16", argLength: 3, commutative: true},
|
||||||
{name: "MaskedSaturatedSubUint16x16", argLength: 3, commutative: false},
|
{name: "MaskedSaturatedSubUint16x16", argLength: 3, commutative: false},
|
||||||
{name: "MaskedSaturatedUnsignedSignedPairDotProdUint16x16", argLength: 3, commutative: false},
|
|
||||||
{name: "MaskedSubUint16x16", argLength: 3, commutative: false},
|
{name: "MaskedSubUint16x16", argLength: 3, commutative: false},
|
||||||
{name: "MaxUint16x16", argLength: 2, commutative: true},
|
{name: "MaxUint16x16", argLength: 2, commutative: true},
|
||||||
{name: "MinUint16x16", argLength: 2, commutative: true},
|
{name: "MinUint16x16", argLength: 2, commutative: true},
|
||||||
|
|
@ -991,7 +990,6 @@ func simdGenericOps() []opData {
|
||||||
{name: "PopCountUint16x16", argLength: 1, commutative: false},
|
{name: "PopCountUint16x16", argLength: 1, commutative: false},
|
||||||
{name: "SaturatedAddUint16x16", argLength: 2, commutative: true},
|
{name: "SaturatedAddUint16x16", argLength: 2, commutative: true},
|
||||||
{name: "SaturatedSubUint16x16", argLength: 2, commutative: false},
|
{name: "SaturatedSubUint16x16", argLength: 2, commutative: false},
|
||||||
{name: "SaturatedUnsignedSignedPairDotProdUint16x16", argLength: 2, commutative: false},
|
|
||||||
{name: "SubUint16x16", argLength: 2, commutative: false},
|
{name: "SubUint16x16", argLength: 2, commutative: false},
|
||||||
{name: "XorUint16x16", argLength: 2, commutative: true},
|
{name: "XorUint16x16", argLength: 2, commutative: true},
|
||||||
{name: "AddUint16x32", argLength: 2, commutative: true},
|
{name: "AddUint16x32", argLength: 2, commutative: true},
|
||||||
|
|
@ -1015,7 +1013,6 @@ func simdGenericOps() []opData {
|
||||||
{name: "MaskedPopCountUint16x32", argLength: 2, commutative: false},
|
{name: "MaskedPopCountUint16x32", argLength: 2, commutative: false},
|
||||||
{name: "MaskedSaturatedAddUint16x32", argLength: 3, commutative: true},
|
{name: "MaskedSaturatedAddUint16x32", argLength: 3, commutative: true},
|
||||||
{name: "MaskedSaturatedSubUint16x32", argLength: 3, commutative: false},
|
{name: "MaskedSaturatedSubUint16x32", argLength: 3, commutative: false},
|
||||||
{name: "MaskedSaturatedUnsignedSignedPairDotProdUint16x32", argLength: 3, commutative: false},
|
|
||||||
{name: "MaskedSubUint16x32", argLength: 3, commutative: false},
|
{name: "MaskedSubUint16x32", argLength: 3, commutative: false},
|
||||||
{name: "MaxUint16x32", argLength: 2, commutative: true},
|
{name: "MaxUint16x32", argLength: 2, commutative: true},
|
||||||
{name: "MinUint16x32", argLength: 2, commutative: true},
|
{name: "MinUint16x32", argLength: 2, commutative: true},
|
||||||
|
|
@ -1024,7 +1021,6 @@ func simdGenericOps() []opData {
|
||||||
{name: "PopCountUint16x32", argLength: 1, commutative: false},
|
{name: "PopCountUint16x32", argLength: 1, commutative: false},
|
||||||
{name: "SaturatedAddUint16x32", argLength: 2, commutative: true},
|
{name: "SaturatedAddUint16x32", argLength: 2, commutative: true},
|
||||||
{name: "SaturatedSubUint16x32", argLength: 2, commutative: false},
|
{name: "SaturatedSubUint16x32", argLength: 2, commutative: false},
|
||||||
{name: "SaturatedUnsignedSignedPairDotProdUint16x32", argLength: 2, commutative: false},
|
|
||||||
{name: "SubUint16x32", argLength: 2, commutative: false},
|
{name: "SubUint16x32", argLength: 2, commutative: false},
|
||||||
{name: "AddUint16x8", argLength: 2, commutative: true},
|
{name: "AddUint16x8", argLength: 2, commutative: true},
|
||||||
{name: "AndUint16x8", argLength: 2, commutative: true},
|
{name: "AndUint16x8", argLength: 2, commutative: true},
|
||||||
|
|
@ -1049,7 +1045,6 @@ func simdGenericOps() []opData {
|
||||||
{name: "MaskedPopCountUint16x8", argLength: 2, commutative: false},
|
{name: "MaskedPopCountUint16x8", argLength: 2, commutative: false},
|
||||||
{name: "MaskedSaturatedAddUint16x8", argLength: 3, commutative: true},
|
{name: "MaskedSaturatedAddUint16x8", argLength: 3, commutative: true},
|
||||||
{name: "MaskedSaturatedSubUint16x8", argLength: 3, commutative: false},
|
{name: "MaskedSaturatedSubUint16x8", argLength: 3, commutative: false},
|
||||||
{name: "MaskedSaturatedUnsignedSignedPairDotProdUint16x8", argLength: 3, commutative: false},
|
|
||||||
{name: "MaskedSubUint16x8", argLength: 3, commutative: false},
|
{name: "MaskedSubUint16x8", argLength: 3, commutative: false},
|
||||||
{name: "MaxUint16x8", argLength: 2, commutative: true},
|
{name: "MaxUint16x8", argLength: 2, commutative: true},
|
||||||
{name: "MinUint16x8", argLength: 2, commutative: true},
|
{name: "MinUint16x8", argLength: 2, commutative: true},
|
||||||
|
|
@ -1061,7 +1056,6 @@ func simdGenericOps() []opData {
|
||||||
{name: "PopCountUint16x8", argLength: 1, commutative: false},
|
{name: "PopCountUint16x8", argLength: 1, commutative: false},
|
||||||
{name: "SaturatedAddUint16x8", argLength: 2, commutative: true},
|
{name: "SaturatedAddUint16x8", argLength: 2, commutative: true},
|
||||||
{name: "SaturatedSubUint16x8", argLength: 2, commutative: false},
|
{name: "SaturatedSubUint16x8", argLength: 2, commutative: false},
|
||||||
{name: "SaturatedUnsignedSignedPairDotProdUint16x8", argLength: 2, commutative: false},
|
|
||||||
{name: "SubUint16x8", argLength: 2, commutative: false},
|
{name: "SubUint16x8", argLength: 2, commutative: false},
|
||||||
{name: "XorUint16x8", argLength: 2, commutative: true},
|
{name: "XorUint16x8", argLength: 2, commutative: true},
|
||||||
{name: "AddUint32x16", argLength: 2, commutative: true},
|
{name: "AddUint32x16", argLength: 2, commutative: true},
|
||||||
|
|
@ -1290,6 +1284,7 @@ func simdGenericOps() []opData {
|
||||||
{name: "MaskedPopCountUint8x16", argLength: 2, commutative: false},
|
{name: "MaskedPopCountUint8x16", argLength: 2, commutative: false},
|
||||||
{name: "MaskedSaturatedAddUint8x16", argLength: 3, commutative: true},
|
{name: "MaskedSaturatedAddUint8x16", argLength: 3, commutative: true},
|
||||||
{name: "MaskedSaturatedSubUint8x16", argLength: 3, commutative: false},
|
{name: "MaskedSaturatedSubUint8x16", argLength: 3, commutative: false},
|
||||||
|
{name: "MaskedSaturatedUnsignedSignedPairDotProdUint8x16", argLength: 3, commutative: false},
|
||||||
{name: "MaskedSubUint8x16", argLength: 3, commutative: false},
|
{name: "MaskedSubUint8x16", argLength: 3, commutative: false},
|
||||||
{name: "MaxUint8x16", argLength: 2, commutative: true},
|
{name: "MaxUint8x16", argLength: 2, commutative: true},
|
||||||
{name: "MinUint8x16", argLength: 2, commutative: true},
|
{name: "MinUint8x16", argLength: 2, commutative: true},
|
||||||
|
|
@ -1323,6 +1318,7 @@ func simdGenericOps() []opData {
|
||||||
{name: "MaskedPopCountUint8x32", argLength: 2, commutative: false},
|
{name: "MaskedPopCountUint8x32", argLength: 2, commutative: false},
|
||||||
{name: "MaskedSaturatedAddUint8x32", argLength: 3, commutative: true},
|
{name: "MaskedSaturatedAddUint8x32", argLength: 3, commutative: true},
|
||||||
{name: "MaskedSaturatedSubUint8x32", argLength: 3, commutative: false},
|
{name: "MaskedSaturatedSubUint8x32", argLength: 3, commutative: false},
|
||||||
|
{name: "MaskedSaturatedUnsignedSignedPairDotProdUint8x32", argLength: 3, commutative: false},
|
||||||
{name: "MaskedSubUint8x32", argLength: 3, commutative: false},
|
{name: "MaskedSubUint8x32", argLength: 3, commutative: false},
|
||||||
{name: "MaxUint8x32", argLength: 2, commutative: true},
|
{name: "MaxUint8x32", argLength: 2, commutative: true},
|
||||||
{name: "MinUint8x32", argLength: 2, commutative: true},
|
{name: "MinUint8x32", argLength: 2, commutative: true},
|
||||||
|
|
@ -1354,6 +1350,7 @@ func simdGenericOps() []opData {
|
||||||
{name: "MaskedPopCountUint8x64", argLength: 2, commutative: false},
|
{name: "MaskedPopCountUint8x64", argLength: 2, commutative: false},
|
||||||
{name: "MaskedSaturatedAddUint8x64", argLength: 3, commutative: true},
|
{name: "MaskedSaturatedAddUint8x64", argLength: 3, commutative: true},
|
||||||
{name: "MaskedSaturatedSubUint8x64", argLength: 3, commutative: false},
|
{name: "MaskedSaturatedSubUint8x64", argLength: 3, commutative: false},
|
||||||
|
{name: "MaskedSaturatedUnsignedSignedPairDotProdUint8x64", argLength: 3, commutative: false},
|
||||||
{name: "MaskedSubUint8x64", argLength: 3, commutative: false},
|
{name: "MaskedSubUint8x64", argLength: 3, commutative: false},
|
||||||
{name: "MaxUint8x64", argLength: 2, commutative: true},
|
{name: "MaxUint8x64", argLength: 2, commutative: true},
|
||||||
{name: "MinUint8x64", argLength: 2, commutative: true},
|
{name: "MinUint8x64", argLength: 2, commutative: true},
|
||||||
|
|
@ -1361,6 +1358,7 @@ func simdGenericOps() []opData {
|
||||||
{name: "PopCountUint8x64", argLength: 1, commutative: false},
|
{name: "PopCountUint8x64", argLength: 1, commutative: false},
|
||||||
{name: "SaturatedAddUint8x64", argLength: 2, commutative: true},
|
{name: "SaturatedAddUint8x64", argLength: 2, commutative: true},
|
||||||
{name: "SaturatedSubUint8x64", argLength: 2, commutative: false},
|
{name: "SaturatedSubUint8x64", argLength: 2, commutative: false},
|
||||||
|
{name: "SaturatedUnsignedSignedPairDotProdUint8x64", argLength: 2, commutative: false},
|
||||||
{name: "SubUint8x64", argLength: 2, commutative: false},
|
{name: "SubUint8x64", argLength: 2, commutative: false},
|
||||||
{name: "CeilSuppressExceptionWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
|
{name: "CeilSuppressExceptionWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
|
||||||
{name: "CeilWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
|
{name: "CeilWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
|
||||||
|
|
|
||||||
|
|
@ -1898,7 +1898,6 @@ const (
|
||||||
OpAMD64VPMAXUWMasked256
|
OpAMD64VPMAXUWMasked256
|
||||||
OpAMD64VPMINUWMasked256
|
OpAMD64VPMINUWMasked256
|
||||||
OpAMD64VPMULHUWMasked256
|
OpAMD64VPMULHUWMasked256
|
||||||
OpAMD64VPMADDUBSWMasked256
|
|
||||||
OpAMD64VPMAXUW256
|
OpAMD64VPMAXUW256
|
||||||
OpAMD64VPMINUW256
|
OpAMD64VPMINUW256
|
||||||
OpAMD64VPMULHUW256
|
OpAMD64VPMULHUW256
|
||||||
|
|
@ -1907,17 +1906,14 @@ const (
|
||||||
OpAMD64VPMAXUWMasked512
|
OpAMD64VPMAXUWMasked512
|
||||||
OpAMD64VPMINUWMasked512
|
OpAMD64VPMINUWMasked512
|
||||||
OpAMD64VPMULHUWMasked512
|
OpAMD64VPMULHUWMasked512
|
||||||
OpAMD64VPMADDUBSWMasked512
|
|
||||||
OpAMD64VPMAXUW512
|
OpAMD64VPMAXUW512
|
||||||
OpAMD64VPMINUW512
|
OpAMD64VPMINUW512
|
||||||
OpAMD64VPMULHUW512
|
OpAMD64VPMULHUW512
|
||||||
OpAMD64VPMADDUBSW512
|
|
||||||
OpAMD64VPAVGW128
|
OpAMD64VPAVGW128
|
||||||
OpAMD64VPAVGWMasked128
|
OpAMD64VPAVGWMasked128
|
||||||
OpAMD64VPMAXUWMasked128
|
OpAMD64VPMAXUWMasked128
|
||||||
OpAMD64VPMINUWMasked128
|
OpAMD64VPMINUWMasked128
|
||||||
OpAMD64VPMULHUWMasked128
|
OpAMD64VPMULHUWMasked128
|
||||||
OpAMD64VPMADDUBSWMasked128
|
|
||||||
OpAMD64VPMAXUW128
|
OpAMD64VPMAXUW128
|
||||||
OpAMD64VPMINUW128
|
OpAMD64VPMINUW128
|
||||||
OpAMD64VPMULHUW128
|
OpAMD64VPMULHUW128
|
||||||
|
|
@ -1955,6 +1951,7 @@ const (
|
||||||
OpAMD64VPAVGBMasked128
|
OpAMD64VPAVGBMasked128
|
||||||
OpAMD64VPMAXUBMasked128
|
OpAMD64VPMAXUBMasked128
|
||||||
OpAMD64VPMINUBMasked128
|
OpAMD64VPMINUBMasked128
|
||||||
|
OpAMD64VPMADDUBSWMasked128
|
||||||
OpAMD64VPMAXUB128
|
OpAMD64VPMAXUB128
|
||||||
OpAMD64VPMINUB128
|
OpAMD64VPMINUB128
|
||||||
OpAMD64VPMADDUBSW128
|
OpAMD64VPMADDUBSW128
|
||||||
|
|
@ -1962,6 +1959,7 @@ const (
|
||||||
OpAMD64VPAVGBMasked256
|
OpAMD64VPAVGBMasked256
|
||||||
OpAMD64VPMAXUBMasked256
|
OpAMD64VPMAXUBMasked256
|
||||||
OpAMD64VPMINUBMasked256
|
OpAMD64VPMINUBMasked256
|
||||||
|
OpAMD64VPMADDUBSWMasked256
|
||||||
OpAMD64VPMAXUB256
|
OpAMD64VPMAXUB256
|
||||||
OpAMD64VPMINUB256
|
OpAMD64VPMINUB256
|
||||||
OpAMD64VPMADDUBSW256
|
OpAMD64VPMADDUBSW256
|
||||||
|
|
@ -1969,8 +1967,10 @@ const (
|
||||||
OpAMD64VPAVGBMasked512
|
OpAMD64VPAVGBMasked512
|
||||||
OpAMD64VPMAXUBMasked512
|
OpAMD64VPMAXUBMasked512
|
||||||
OpAMD64VPMINUBMasked512
|
OpAMD64VPMINUBMasked512
|
||||||
|
OpAMD64VPMADDUBSWMasked512
|
||||||
OpAMD64VPMAXUB512
|
OpAMD64VPMAXUB512
|
||||||
OpAMD64VPMINUB512
|
OpAMD64VPMINUB512
|
||||||
|
OpAMD64VPMADDUBSW512
|
||||||
OpAMD64VRNDSCALEPS512
|
OpAMD64VRNDSCALEPS512
|
||||||
OpAMD64VREDUCEPS512
|
OpAMD64VREDUCEPS512
|
||||||
OpAMD64VCMPPS512
|
OpAMD64VCMPPS512
|
||||||
|
|
@ -5262,7 +5262,6 @@ const (
|
||||||
OpMaskedPopCountUint16x16
|
OpMaskedPopCountUint16x16
|
||||||
OpMaskedSaturatedAddUint16x16
|
OpMaskedSaturatedAddUint16x16
|
||||||
OpMaskedSaturatedSubUint16x16
|
OpMaskedSaturatedSubUint16x16
|
||||||
OpMaskedSaturatedUnsignedSignedPairDotProdUint16x16
|
|
||||||
OpMaskedSubUint16x16
|
OpMaskedSubUint16x16
|
||||||
OpMaxUint16x16
|
OpMaxUint16x16
|
||||||
OpMinUint16x16
|
OpMinUint16x16
|
||||||
|
|
@ -5274,7 +5273,6 @@ const (
|
||||||
OpPopCountUint16x16
|
OpPopCountUint16x16
|
||||||
OpSaturatedAddUint16x16
|
OpSaturatedAddUint16x16
|
||||||
OpSaturatedSubUint16x16
|
OpSaturatedSubUint16x16
|
||||||
OpSaturatedUnsignedSignedPairDotProdUint16x16
|
|
||||||
OpSubUint16x16
|
OpSubUint16x16
|
||||||
OpXorUint16x16
|
OpXorUint16x16
|
||||||
OpAddUint16x32
|
OpAddUint16x32
|
||||||
|
|
@ -5298,7 +5296,6 @@ const (
|
||||||
OpMaskedPopCountUint16x32
|
OpMaskedPopCountUint16x32
|
||||||
OpMaskedSaturatedAddUint16x32
|
OpMaskedSaturatedAddUint16x32
|
||||||
OpMaskedSaturatedSubUint16x32
|
OpMaskedSaturatedSubUint16x32
|
||||||
OpMaskedSaturatedUnsignedSignedPairDotProdUint16x32
|
|
||||||
OpMaskedSubUint16x32
|
OpMaskedSubUint16x32
|
||||||
OpMaxUint16x32
|
OpMaxUint16x32
|
||||||
OpMinUint16x32
|
OpMinUint16x32
|
||||||
|
|
@ -5307,7 +5304,6 @@ const (
|
||||||
OpPopCountUint16x32
|
OpPopCountUint16x32
|
||||||
OpSaturatedAddUint16x32
|
OpSaturatedAddUint16x32
|
||||||
OpSaturatedSubUint16x32
|
OpSaturatedSubUint16x32
|
||||||
OpSaturatedUnsignedSignedPairDotProdUint16x32
|
|
||||||
OpSubUint16x32
|
OpSubUint16x32
|
||||||
OpAddUint16x8
|
OpAddUint16x8
|
||||||
OpAndUint16x8
|
OpAndUint16x8
|
||||||
|
|
@ -5332,7 +5328,6 @@ const (
|
||||||
OpMaskedPopCountUint16x8
|
OpMaskedPopCountUint16x8
|
||||||
OpMaskedSaturatedAddUint16x8
|
OpMaskedSaturatedAddUint16x8
|
||||||
OpMaskedSaturatedSubUint16x8
|
OpMaskedSaturatedSubUint16x8
|
||||||
OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8
|
|
||||||
OpMaskedSubUint16x8
|
OpMaskedSubUint16x8
|
||||||
OpMaxUint16x8
|
OpMaxUint16x8
|
||||||
OpMinUint16x8
|
OpMinUint16x8
|
||||||
|
|
@ -5344,7 +5339,6 @@ const (
|
||||||
OpPopCountUint16x8
|
OpPopCountUint16x8
|
||||||
OpSaturatedAddUint16x8
|
OpSaturatedAddUint16x8
|
||||||
OpSaturatedSubUint16x8
|
OpSaturatedSubUint16x8
|
||||||
OpSaturatedUnsignedSignedPairDotProdUint16x8
|
|
||||||
OpSubUint16x8
|
OpSubUint16x8
|
||||||
OpXorUint16x8
|
OpXorUint16x8
|
||||||
OpAddUint32x16
|
OpAddUint32x16
|
||||||
|
|
@ -5573,6 +5567,7 @@ const (
|
||||||
OpMaskedPopCountUint8x16
|
OpMaskedPopCountUint8x16
|
||||||
OpMaskedSaturatedAddUint8x16
|
OpMaskedSaturatedAddUint8x16
|
||||||
OpMaskedSaturatedSubUint8x16
|
OpMaskedSaturatedSubUint8x16
|
||||||
|
OpMaskedSaturatedUnsignedSignedPairDotProdUint8x16
|
||||||
OpMaskedSubUint8x16
|
OpMaskedSubUint8x16
|
||||||
OpMaxUint8x16
|
OpMaxUint8x16
|
||||||
OpMinUint8x16
|
OpMinUint8x16
|
||||||
|
|
@ -5606,6 +5601,7 @@ const (
|
||||||
OpMaskedPopCountUint8x32
|
OpMaskedPopCountUint8x32
|
||||||
OpMaskedSaturatedAddUint8x32
|
OpMaskedSaturatedAddUint8x32
|
||||||
OpMaskedSaturatedSubUint8x32
|
OpMaskedSaturatedSubUint8x32
|
||||||
|
OpMaskedSaturatedUnsignedSignedPairDotProdUint8x32
|
||||||
OpMaskedSubUint8x32
|
OpMaskedSubUint8x32
|
||||||
OpMaxUint8x32
|
OpMaxUint8x32
|
||||||
OpMinUint8x32
|
OpMinUint8x32
|
||||||
|
|
@ -5637,6 +5633,7 @@ const (
|
||||||
OpMaskedPopCountUint8x64
|
OpMaskedPopCountUint8x64
|
||||||
OpMaskedSaturatedAddUint8x64
|
OpMaskedSaturatedAddUint8x64
|
||||||
OpMaskedSaturatedSubUint8x64
|
OpMaskedSaturatedSubUint8x64
|
||||||
|
OpMaskedSaturatedUnsignedSignedPairDotProdUint8x64
|
||||||
OpMaskedSubUint8x64
|
OpMaskedSubUint8x64
|
||||||
OpMaxUint8x64
|
OpMaxUint8x64
|
||||||
OpMinUint8x64
|
OpMinUint8x64
|
||||||
|
|
@ -5644,6 +5641,7 @@ const (
|
||||||
OpPopCountUint8x64
|
OpPopCountUint8x64
|
||||||
OpSaturatedAddUint8x64
|
OpSaturatedAddUint8x64
|
||||||
OpSaturatedSubUint8x64
|
OpSaturatedSubUint8x64
|
||||||
|
OpSaturatedUnsignedSignedPairDotProdUint8x64
|
||||||
OpSubUint8x64
|
OpSubUint8x64
|
||||||
OpCeilSuppressExceptionWithPrecisionFloat32x16
|
OpCeilSuppressExceptionWithPrecisionFloat32x16
|
||||||
OpCeilWithPrecisionFloat32x16
|
OpCeilWithPrecisionFloat32x16
|
||||||
|
|
@ -29231,21 +29229,6 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "VPMADDUBSWMasked256",
|
|
||||||
argLen: 3,
|
|
||||||
asm: x86.AVPMADDUBSW,
|
|
||||||
reg: regInfo{
|
|
||||||
inputs: []inputInfo{
|
|
||||||
{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
|
|
||||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
|
||||||
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
|
||||||
},
|
|
||||||
outputs: []outputInfo{
|
|
||||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "VPMAXUW256",
|
name: "VPMAXUW256",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
@ -29370,21 +29353,6 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "VPMADDUBSWMasked512",
|
|
||||||
argLen: 3,
|
|
||||||
asm: x86.AVPMADDUBSW,
|
|
||||||
reg: regInfo{
|
|
||||||
inputs: []inputInfo{
|
|
||||||
{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
|
|
||||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
|
||||||
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
|
||||||
},
|
|
||||||
outputs: []outputInfo{
|
|
||||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "VPMAXUW512",
|
name: "VPMAXUW512",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
@ -29430,20 +29398,6 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "VPMADDUBSW512",
|
|
||||||
argLen: 2,
|
|
||||||
asm: x86.AVPMADDUBSW,
|
|
||||||
reg: regInfo{
|
|
||||||
inputs: []inputInfo{
|
|
||||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
|
||||||
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
|
||||||
},
|
|
||||||
outputs: []outputInfo{
|
|
||||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "VPAVGW128",
|
name: "VPAVGW128",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
@ -29523,21 +29477,6 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "VPMADDUBSWMasked128",
|
|
||||||
argLen: 3,
|
|
||||||
asm: x86.AVPMADDUBSW,
|
|
||||||
reg: regInfo{
|
|
||||||
inputs: []inputInfo{
|
|
||||||
{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
|
|
||||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
|
||||||
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
|
||||||
},
|
|
||||||
outputs: []outputInfo{
|
|
||||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "VPMAXUW128",
|
name: "VPMAXUW128",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
@ -30111,6 +30050,21 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "VPMADDUBSWMasked128",
|
||||||
|
argLen: 3,
|
||||||
|
asm: x86.AVPMADDUBSW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "VPMAXUB128",
|
name: "VPMAXUB128",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
@ -30218,6 +30172,21 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "VPMADDUBSWMasked256",
|
||||||
|
argLen: 3,
|
||||||
|
asm: x86.AVPMADDUBSW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "VPMAXUB256",
|
name: "VPMAXUB256",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
@ -30325,6 +30294,21 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "VPMADDUBSWMasked512",
|
||||||
|
argLen: 3,
|
||||||
|
asm: x86.AVPMADDUBSW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "VPMAXUB512",
|
name: "VPMAXUB512",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
@ -30355,6 +30339,20 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "VPMADDUBSW512",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVPMADDUBSW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "VRNDSCALEPS512",
|
name: "VRNDSCALEPS512",
|
||||||
auxType: auxInt8,
|
auxType: auxInt8,
|
||||||
|
|
@ -64134,11 +64132,6 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "MaskedSaturatedUnsignedSignedPairDotProdUint16x16",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "MaskedSubUint16x16",
|
name: "MaskedSubUint16x16",
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
|
|
@ -64200,11 +64193,6 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "SaturatedUnsignedSignedPairDotProdUint16x16",
|
|
||||||
argLen: 2,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "SubUint16x16",
|
name: "SubUint16x16",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
@ -64332,11 +64320,6 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "MaskedSaturatedUnsignedSignedPairDotProdUint16x32",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "MaskedSubUint16x32",
|
name: "MaskedSubUint16x32",
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
|
|
@ -64382,11 +64365,6 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "SaturatedUnsignedSignedPairDotProdUint16x32",
|
|
||||||
argLen: 2,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "SubUint16x32",
|
name: "SubUint16x32",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
@ -64519,11 +64497,6 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "MaskedSaturatedUnsignedSignedPairDotProdUint16x8",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "MaskedSubUint16x8",
|
name: "MaskedSubUint16x8",
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
|
|
@ -64585,11 +64558,6 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "SaturatedUnsignedSignedPairDotProdUint16x8",
|
|
||||||
argLen: 2,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "SubUint16x8",
|
name: "SubUint16x8",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
@ -65846,6 +65814,11 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "MaskedSaturatedUnsignedSignedPairDotProdUint8x16",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "MaskedSubUint8x16",
|
name: "MaskedSubUint8x16",
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
|
|
@ -66028,6 +66001,11 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "MaskedSaturatedUnsignedSignedPairDotProdUint8x32",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "MaskedSubUint8x32",
|
name: "MaskedSubUint8x32",
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
|
|
@ -66199,6 +66177,11 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "MaskedSaturatedUnsignedSignedPairDotProdUint8x64",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "MaskedSubUint8x64",
|
name: "MaskedSubUint8x64",
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
|
|
@ -66238,6 +66221,11 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "SaturatedUnsignedSignedPairDotProdUint8x64",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "SubUint8x64",
|
name: "SubUint8x64",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
|
||||||
|
|
@ -3374,12 +3374,12 @@ func rewriteValueAMD64(v *Value) bool {
|
||||||
return rewriteValueAMD64_OpMaskedSaturatedSubUint8x32(v)
|
return rewriteValueAMD64_OpMaskedSaturatedSubUint8x32(v)
|
||||||
case OpMaskedSaturatedSubUint8x64:
|
case OpMaskedSaturatedSubUint8x64:
|
||||||
return rewriteValueAMD64_OpMaskedSaturatedSubUint8x64(v)
|
return rewriteValueAMD64_OpMaskedSaturatedSubUint8x64(v)
|
||||||
case OpMaskedSaturatedUnsignedSignedPairDotProdUint16x16:
|
case OpMaskedSaturatedUnsignedSignedPairDotProdUint8x16:
|
||||||
return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x16(v)
|
return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint8x16(v)
|
||||||
case OpMaskedSaturatedUnsignedSignedPairDotProdUint16x32:
|
case OpMaskedSaturatedUnsignedSignedPairDotProdUint8x32:
|
||||||
return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x32(v)
|
return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint8x32(v)
|
||||||
case OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8:
|
case OpMaskedSaturatedUnsignedSignedPairDotProdUint8x64:
|
||||||
return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8(v)
|
return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint8x64(v)
|
||||||
case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16:
|
case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16:
|
||||||
return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16(v)
|
return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16(v)
|
||||||
case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4:
|
case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4:
|
||||||
|
|
@ -4455,21 +4455,15 @@ func rewriteValueAMD64(v *Value) bool {
|
||||||
case OpSaturatedSubUint8x64:
|
case OpSaturatedSubUint8x64:
|
||||||
v.Op = OpAMD64VPSUBSB512
|
v.Op = OpAMD64VPSUBSB512
|
||||||
return true
|
return true
|
||||||
case OpSaturatedUnsignedSignedPairDotProdUint16x16:
|
|
||||||
v.Op = OpAMD64VPMADDUBSW256
|
|
||||||
return true
|
|
||||||
case OpSaturatedUnsignedSignedPairDotProdUint16x32:
|
|
||||||
v.Op = OpAMD64VPMADDUBSW512
|
|
||||||
return true
|
|
||||||
case OpSaturatedUnsignedSignedPairDotProdUint16x8:
|
|
||||||
v.Op = OpAMD64VPMADDUBSW128
|
|
||||||
return true
|
|
||||||
case OpSaturatedUnsignedSignedPairDotProdUint8x16:
|
case OpSaturatedUnsignedSignedPairDotProdUint8x16:
|
||||||
v.Op = OpAMD64VPMADDUBSW128
|
v.Op = OpAMD64VPMADDUBSW128
|
||||||
return true
|
return true
|
||||||
case OpSaturatedUnsignedSignedPairDotProdUint8x32:
|
case OpSaturatedUnsignedSignedPairDotProdUint8x32:
|
||||||
v.Op = OpAMD64VPMADDUBSW256
|
v.Op = OpAMD64VPMADDUBSW256
|
||||||
return true
|
return true
|
||||||
|
case OpSaturatedUnsignedSignedPairDotProdUint8x64:
|
||||||
|
v.Op = OpAMD64VPMADDUBSW512
|
||||||
|
return true
|
||||||
case OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16:
|
case OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16:
|
||||||
v.Op = OpAMD64VPDPBUSDS512
|
v.Op = OpAMD64VPDPBUSDS512
|
||||||
return true
|
return true
|
||||||
|
|
@ -46801,12 +46795,30 @@ func rewriteValueAMD64_OpMaskedSaturatedSubUint8x64(v *Value) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x16(v *Value) bool {
|
func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint8x16(v *Value) bool {
|
||||||
v_2 := v.Args[2]
|
v_2 := v.Args[2]
|
||||||
v_1 := v.Args[1]
|
v_1 := v.Args[1]
|
||||||
v_0 := v.Args[0]
|
v_0 := v.Args[0]
|
||||||
b := v.Block
|
b := v.Block
|
||||||
// match: (MaskedSaturatedUnsignedSignedPairDotProdUint16x16 x y mask)
|
// match: (MaskedSaturatedUnsignedSignedPairDotProdUint8x16 x y mask)
|
||||||
|
// result: (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||||
|
for {
|
||||||
|
x := v_0
|
||||||
|
y := v_1
|
||||||
|
mask := v_2
|
||||||
|
v.reset(OpAMD64VPMADDUBSWMasked128)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
|
||||||
|
v0.AddArg(mask)
|
||||||
|
v.AddArg3(x, y, v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint8x32(v *Value) bool {
|
||||||
|
v_2 := v.Args[2]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (MaskedSaturatedUnsignedSignedPairDotProdUint8x32 x y mask)
|
||||||
// result: (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
// result: (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||||
for {
|
for {
|
||||||
x := v_0
|
x := v_0
|
||||||
|
|
@ -46819,12 +46831,12 @@ func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x16(v *Va
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x32(v *Value) bool {
|
func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint8x64(v *Value) bool {
|
||||||
v_2 := v.Args[2]
|
v_2 := v.Args[2]
|
||||||
v_1 := v.Args[1]
|
v_1 := v.Args[1]
|
||||||
v_0 := v.Args[0]
|
v_0 := v.Args[0]
|
||||||
b := v.Block
|
b := v.Block
|
||||||
// match: (MaskedSaturatedUnsignedSignedPairDotProdUint16x32 x y mask)
|
// match: (MaskedSaturatedUnsignedSignedPairDotProdUint8x64 x y mask)
|
||||||
// result: (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
|
// result: (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
|
||||||
for {
|
for {
|
||||||
x := v_0
|
x := v_0
|
||||||
|
|
@ -46837,24 +46849,6 @@ func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x32(v *Va
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8(v *Value) bool {
|
|
||||||
v_2 := v.Args[2]
|
|
||||||
v_1 := v.Args[1]
|
|
||||||
v_0 := v.Args[0]
|
|
||||||
b := v.Block
|
|
||||||
// match: (MaskedSaturatedUnsignedSignedPairDotProdUint16x8 x y mask)
|
|
||||||
// result: (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
|
||||||
for {
|
|
||||||
x := v_0
|
|
||||||
y := v_1
|
|
||||||
mask := v_2
|
|
||||||
v.reset(OpAMD64VPMADDUBSWMasked128)
|
|
||||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
|
|
||||||
v0.AddArg(mask)
|
|
||||||
v.AddArg3(x, y, v0)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16(v *Value) bool {
|
func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16(v *Value) bool {
|
||||||
v_3 := v.Args[3]
|
v_3 := v.Args[3]
|
||||||
v_2 := v.Args[2]
|
v_2 := v.Args[2]
|
||||||
|
|
|
||||||
|
|
@ -1126,9 +1126,9 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||||
addF(simdPackage, "Uint16x8.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x8, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Uint16x8.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x8, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Uint16x16.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x16, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Uint16x16.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x16, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Uint16x32.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x32, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Uint16x32.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x32, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Uint16x8.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Uint8x16.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint8x16, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Uint16x16.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint16x16, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Uint8x32.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint8x32, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Uint16x32.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint16x32, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Uint8x64.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint8x64, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x4.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int32x4.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x8.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int32x8.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x16.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Int32x16.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
|
|
@ -1463,9 +1463,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||||
addF(simdPackage, "Uint16x32.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x32, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Uint16x32.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x32, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Uint8x16.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x16, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Uint8x16.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x16, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Uint8x32.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x32, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Uint8x32.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x32, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Uint16x8.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint16x8, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Uint8x64.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x64, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Uint16x16.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint16x16, types.TypeVec256), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint16x32.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint16x32, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int32x4.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int32x4.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x8.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int32x8.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x16.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Int32x16.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
|
|
|
||||||
|
|
@ -5962,17 +5962,17 @@ func (x Int16x32) MaskedPairDotProd(y Int16x32, z Mask16x32) Int32x16
|
||||||
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
|
// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
|
||||||
func (x Int32x4) MaskedPairDotProdAccumulate(y Int16x8, z Int32x4, u Mask32x4) Int32x4
|
func (x Int32x4) MaskedPairDotProdAccumulate(y Int16x8, z Int16x8, u Mask32x4) Int32x4
|
||||||
|
|
||||||
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
|
// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
|
||||||
func (x Int32x8) MaskedPairDotProdAccumulate(y Int16x16, z Int32x8, u Mask32x8) Int32x8
|
func (x Int32x8) MaskedPairDotProdAccumulate(y Int16x16, z Int16x16, u Mask32x8) Int32x8
|
||||||
|
|
||||||
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
|
// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
|
||||||
func (x Int32x16) MaskedPairDotProdAccumulate(y Int16x32, z Int32x16, u Mask32x16) Int32x16
|
func (x Int32x16) MaskedPairDotProdAccumulate(y Int16x32, z Int16x32, u Mask32x16) Int32x16
|
||||||
|
|
||||||
/* MaskedPopCount */
|
/* MaskedPopCount */
|
||||||
|
|
||||||
|
|
@ -6239,17 +6239,17 @@ func (x Uint16x32) MaskedSaturatedAdd(y Uint16x32, z Mask16x32) Uint16x32
|
||||||
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
|
// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
|
||||||
func (x Int32x4) MaskedSaturatedPairDotProdAccumulate(y Int16x8, z Int32x4, u Mask32x4) Int32x4
|
func (x Int32x4) MaskedSaturatedPairDotProdAccumulate(y Int16x8, z Int16x8, u Mask32x4) Int32x4
|
||||||
|
|
||||||
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
|
// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
|
||||||
func (x Int32x8) MaskedSaturatedPairDotProdAccumulate(y Int16x16, z Int32x8, u Mask32x8) Int32x8
|
func (x Int32x8) MaskedSaturatedPairDotProdAccumulate(y Int16x16, z Int16x16, u Mask32x8) Int32x8
|
||||||
|
|
||||||
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
|
// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
|
||||||
func (x Int32x16) MaskedSaturatedPairDotProdAccumulate(y Int16x32, z Int32x16, u Mask32x16) Int32x16
|
func (x Int32x16) MaskedSaturatedPairDotProdAccumulate(y Int16x32, z Int16x32, u Mask32x16) Int32x16
|
||||||
|
|
||||||
/* MaskedSaturatedSub */
|
/* MaskedSaturatedSub */
|
||||||
|
|
||||||
|
|
@ -6319,51 +6319,51 @@ func (x Uint16x32) MaskedSaturatedSub(y Uint16x32, z Mask16x32) Uint16x32
|
||||||
// yielding a vector of half as many elements with twice the input element size.
|
// yielding a vector of half as many elements with twice the input element size.
|
||||||
//
|
//
|
||||||
// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
|
// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
|
||||||
func (x Uint16x8) MaskedSaturatedUnsignedSignedPairDotProd(y Int16x8, z Mask16x8) Int16x8
|
func (x Uint8x16) MaskedSaturatedUnsignedSignedPairDotProd(y Int8x16, z Mask16x8) Int16x8
|
||||||
|
|
||||||
// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
|
// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
|
||||||
// yielding a vector of half as many elements with twice the input element size.
|
// yielding a vector of half as many elements with twice the input element size.
|
||||||
//
|
//
|
||||||
// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
|
// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
|
||||||
func (x Uint16x16) MaskedSaturatedUnsignedSignedPairDotProd(y Int16x16, z Mask16x16) Int16x16
|
func (x Uint8x32) MaskedSaturatedUnsignedSignedPairDotProd(y Int8x32, z Mask16x16) Int16x16
|
||||||
|
|
||||||
// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
|
// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
|
||||||
// yielding a vector of half as many elements with twice the input element size.
|
// yielding a vector of half as many elements with twice the input element size.
|
||||||
//
|
//
|
||||||
// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
|
// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
|
||||||
func (x Uint16x32) MaskedSaturatedUnsignedSignedPairDotProd(y Int16x32, z Mask16x32) Int16x32
|
func (x Uint8x64) MaskedSaturatedUnsignedSignedPairDotProd(y Int8x64, z Mask16x32) Int16x32
|
||||||
|
|
||||||
/* MaskedSaturatedUnsignedSignedQuadDotProdAccumulate */
|
/* MaskedSaturatedUnsignedSignedQuadDotProdAccumulate */
|
||||||
|
|
||||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
|
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
|
||||||
func (x Int32x4) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Int32x4
|
func (x Int32x4) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16, u Mask32x4) Int32x4
|
||||||
|
|
||||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
|
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
|
||||||
func (x Int32x8) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Int32x8
|
func (x Int32x8) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32, u Mask32x8) Int32x8
|
||||||
|
|
||||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
|
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
|
||||||
func (x Int32x16) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Int32x16
|
func (x Int32x16) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64, u Mask32x16) Int32x16
|
||||||
|
|
||||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
|
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
|
||||||
func (x Uint32x4) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Uint32x4
|
func (x Uint32x4) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16, u Mask32x4) Uint32x4
|
||||||
|
|
||||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
|
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
|
||||||
func (x Uint32x8) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Uint32x8
|
func (x Uint32x8) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32, u Mask32x8) Uint32x8
|
||||||
|
|
||||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
|
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
|
||||||
func (x Uint32x16) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Uint32x16
|
func (x Uint32x16) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64, u Mask32x16) Uint32x16
|
||||||
|
|
||||||
/* MaskedSqrt */
|
/* MaskedSqrt */
|
||||||
|
|
||||||
|
|
@ -6630,32 +6630,32 @@ func (x Float64x8) MaskedTruncWithPrecision(imm uint8, y Mask64x8) Float64x8
|
||||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
|
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
|
||||||
func (x Int32x4) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Int32x4
|
func (x Int32x4) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16, u Mask32x4) Int32x4
|
||||||
|
|
||||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
|
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
|
||||||
func (x Int32x8) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Int32x8
|
func (x Int32x8) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32, u Mask32x8) Int32x8
|
||||||
|
|
||||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
|
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
|
||||||
func (x Int32x16) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Int32x16
|
func (x Int32x16) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64, u Mask32x16) Int32x16
|
||||||
|
|
||||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
|
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
|
||||||
func (x Uint32x4) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Uint32x4
|
func (x Uint32x4) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16, u Mask32x4) Uint32x4
|
||||||
|
|
||||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
|
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
|
||||||
func (x Uint32x8) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Uint32x8
|
func (x Uint32x8) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32, u Mask32x8) Uint32x8
|
||||||
|
|
||||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
|
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
|
||||||
func (x Uint32x16) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Uint32x16
|
func (x Uint32x16) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64, u Mask32x16) Uint32x16
|
||||||
|
|
||||||
/* MaskedXor */
|
/* MaskedXor */
|
||||||
|
|
||||||
|
|
@ -7597,17 +7597,17 @@ func (x Int16x32) PairDotProd(y Int16x32) Int32x16
|
||||||
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPWSSD, CPU Feature: AVX_VNNI
|
// Asm: VPDPWSSD, CPU Feature: AVX_VNNI
|
||||||
func (x Int32x4) PairDotProdAccumulate(y Int32x4, z Int32x4) Int32x4
|
func (x Int32x4) PairDotProdAccumulate(y Int16x8, z Int16x8) Int32x4
|
||||||
|
|
||||||
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPWSSD, CPU Feature: AVX_VNNI
|
// Asm: VPDPWSSD, CPU Feature: AVX_VNNI
|
||||||
func (x Int32x8) PairDotProdAccumulate(y Int32x8, z Int32x8) Int32x8
|
func (x Int32x8) PairDotProdAccumulate(y Int16x16, z Int16x16) Int32x8
|
||||||
|
|
||||||
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
|
// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
|
||||||
func (x Int32x16) PairDotProdAccumulate(y Int16x32, z Int32x16) Int32x16
|
func (x Int32x16) PairDotProdAccumulate(y Int16x32, z Int16x32) Int32x16
|
||||||
|
|
||||||
/* PairwiseAdd */
|
/* PairwiseAdd */
|
||||||
|
|
||||||
|
|
@ -8048,17 +8048,17 @@ func (x Uint16x32) SaturatedAdd(y Uint16x32) Uint16x32
|
||||||
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPWSSDS, CPU Feature: AVX_VNNI
|
// Asm: VPDPWSSDS, CPU Feature: AVX_VNNI
|
||||||
func (x Int32x4) SaturatedPairDotProdAccumulate(y Int32x4, z Int32x4) Int32x4
|
func (x Int32x4) SaturatedPairDotProdAccumulate(y Int16x8, z Int16x8) Int32x4
|
||||||
|
|
||||||
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPWSSDS, CPU Feature: AVX_VNNI
|
// Asm: VPDPWSSDS, CPU Feature: AVX_VNNI
|
||||||
func (x Int32x8) SaturatedPairDotProdAccumulate(y Int32x8, z Int32x8) Int32x8
|
func (x Int32x8) SaturatedPairDotProdAccumulate(y Int16x16, z Int16x16) Int32x8
|
||||||
|
|
||||||
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
|
// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
|
||||||
func (x Int32x16) SaturatedPairDotProdAccumulate(y Int16x32, z Int32x16) Int32x16
|
func (x Int32x16) SaturatedPairDotProdAccumulate(y Int16x32, z Int16x32) Int32x16
|
||||||
|
|
||||||
/* SaturatedPairwiseAdd */
|
/* SaturatedPairwiseAdd */
|
||||||
|
|
||||||
|
|
@ -8168,51 +8168,39 @@ func (x Uint8x32) SaturatedUnsignedSignedPairDotProd(y Int8x32) Int16x16
|
||||||
// yielding a vector of half as many elements with twice the input element size.
|
// yielding a vector of half as many elements with twice the input element size.
|
||||||
//
|
//
|
||||||
// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
|
// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
|
||||||
func (x Uint16x8) SaturatedUnsignedSignedPairDotProd(y Int16x8) Int16x8
|
func (x Uint8x64) SaturatedUnsignedSignedPairDotProd(y Int8x64) Int16x32
|
||||||
|
|
||||||
// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
|
|
||||||
// yielding a vector of half as many elements with twice the input element size.
|
|
||||||
//
|
|
||||||
// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
|
|
||||||
func (x Uint16x16) SaturatedUnsignedSignedPairDotProd(y Int16x16) Int16x16
|
|
||||||
|
|
||||||
// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
|
|
||||||
// yielding a vector of half as many elements with twice the input element size.
|
|
||||||
//
|
|
||||||
// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
|
|
||||||
func (x Uint16x32) SaturatedUnsignedSignedPairDotProd(y Int16x32) Int16x32
|
|
||||||
|
|
||||||
/* SaturatedUnsignedSignedQuadDotProdAccumulate */
|
/* SaturatedUnsignedSignedQuadDotProdAccumulate */
|
||||||
|
|
||||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
|
// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
|
||||||
func (x Int32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Int32x4
|
func (x Int32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16) Int32x4
|
||||||
|
|
||||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
|
// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
|
||||||
func (x Int32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Int32x8
|
func (x Int32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32) Int32x8
|
||||||
|
|
||||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
|
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
|
||||||
func (x Int32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16
|
func (x Int32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Int32x16
|
||||||
|
|
||||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
|
// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
|
||||||
func (x Uint32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Uint32x4
|
func (x Uint32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16) Uint32x4
|
||||||
|
|
||||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
|
// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
|
||||||
func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Uint32x8
|
func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32) Uint32x8
|
||||||
|
|
||||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
|
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
|
||||||
func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Uint32x16
|
func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Uint32x16
|
||||||
|
|
||||||
/* Sign */
|
/* Sign */
|
||||||
|
|
||||||
|
|
@ -8543,32 +8531,32 @@ func (x Float64x8) TruncWithPrecision(imm8 uint8) Float64x8
|
||||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
|
// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
|
||||||
func (x Int32x4) UnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Int32x4
|
func (x Int32x4) UnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16) Int32x4
|
||||||
|
|
||||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
|
// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
|
||||||
func (x Int32x8) UnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Int32x8
|
func (x Int32x8) UnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32) Int32x8
|
||||||
|
|
||||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
|
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
|
||||||
func (x Int32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16
|
func (x Int32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Int32x16
|
||||||
|
|
||||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
|
// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
|
||||||
func (x Uint32x4) UnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Uint32x4
|
func (x Uint32x4) UnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16) Uint32x4
|
||||||
|
|
||||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
|
// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
|
||||||
func (x Uint32x8) UnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Uint32x8
|
func (x Uint32x8) UnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32) Uint32x8
|
||||||
|
|
||||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
|
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
|
||||||
func (x Uint32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Uint32x16
|
func (x Uint32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Uint32x16
|
||||||
|
|
||||||
/* Xor */
|
/* Xor */
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue