[dev.simd] cmd/compile, simd: reorder PairDotProdAccumulate

This CL reorderes the param order of PairDotProdAccumulate family to be
dotprod(x, y) + z instead of the old dotprod(y, z) + x.

This CL also updates some documentation of other ML Ops.

This CL added a test to test the behavior is correct.

This CL is partially generated by CL 688115.

Change-Id: I76a6ee55a2ad8e3aff388d7e4fa5218ec0e4800d
Reviewed-on: https://go-review.googlesource.com/c/go/+/688095
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Junyang Shao 2025-07-15 05:13:55 +00:00
parent ef5f6cc921
commit c61743e4f0
9 changed files with 288 additions and 1008 deletions

View file

@ -1350,15 +1350,9 @@
(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 ...) => (VPDPBUSDS128 ...) (SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 ...) => (VPDPBUSDS128 ...)
(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 ...) => (VPDPBUSDS256 ...) (SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 ...) => (VPDPBUSDS256 ...)
(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSDS512 ...) (SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSDS512 ...)
(SaturatedUnsignedSignedQuadDotProdAccumulateUint32x4 ...) => (VPDPBUSDS128 ...)
(SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8 ...) => (VPDPBUSDS256 ...)
(SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16 ...) => (VPDPBUSDS512 ...)
(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask)) (SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask)) (SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask)) (SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
(Set128Float32x8 ...) => (VINSERTF128256 ...) (Set128Float32x8 ...) => (VINSERTF128256 ...)
(Set128Float64x4 ...) => (VINSERTF128256 ...) (Set128Float64x4 ...) => (VINSERTF128256 ...)
(Set128Int8x32 ...) => (VINSERTI128256 ...) (Set128Int8x32 ...) => (VINSERTI128256 ...)
@ -1762,15 +1756,9 @@
(UnsignedSignedQuadDotProdAccumulateInt32x4 ...) => (VPDPBUSD128 ...) (UnsignedSignedQuadDotProdAccumulateInt32x4 ...) => (VPDPBUSD128 ...)
(UnsignedSignedQuadDotProdAccumulateInt32x8 ...) => (VPDPBUSD256 ...) (UnsignedSignedQuadDotProdAccumulateInt32x8 ...) => (VPDPBUSD256 ...)
(UnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSD512 ...) (UnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSD512 ...)
(UnsignedSignedQuadDotProdAccumulateUint32x4 ...) => (VPDPBUSD128 ...)
(UnsignedSignedQuadDotProdAccumulateUint32x8 ...) => (VPDPBUSD256 ...)
(UnsignedSignedQuadDotProdAccumulateUint32x16 ...) => (VPDPBUSD512 ...)
(UnsignedSignedQuadDotProdAccumulateMaskedInt32x4 x y z mask) => (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask)) (UnsignedSignedQuadDotProdAccumulateMaskedInt32x4 x y z mask) => (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
(UnsignedSignedQuadDotProdAccumulateMaskedInt32x8 x y z mask) => (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask)) (UnsignedSignedQuadDotProdAccumulateMaskedInt32x8 x y z mask) => (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
(UnsignedSignedQuadDotProdAccumulateMaskedInt32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask)) (UnsignedSignedQuadDotProdAccumulateMaskedInt32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
(UnsignedSignedQuadDotProdAccumulateMaskedUint32x4 x y z mask) => (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
(UnsignedSignedQuadDotProdAccumulateMaskedUint32x8 x y z mask) => (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
(UnsignedSignedQuadDotProdAccumulateMaskedUint32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
(XorInt8x16 ...) => (VPXOR128 ...) (XorInt8x16 ...) => (VPXOR128 ...)
(XorInt8x32 ...) => (VPXOR256 ...) (XorInt8x32 ...) => (VPXOR256 ...)
(XorInt16x8 ...) => (VPXOR128 ...) (XorInt16x8 ...) => (VPXOR128 ...)

View file

@ -914,8 +914,8 @@ func simdGenericOps() []opData {
{name: "Permute2Int16x16", argLength: 3, commutative: false}, {name: "Permute2Int16x16", argLength: 3, commutative: false},
{name: "Permute2MaskedInt16x16", argLength: 4, commutative: false}, {name: "Permute2MaskedInt16x16", argLength: 4, commutative: false},
{name: "Permute2MaskedUint16x16", argLength: 4, commutative: false}, {name: "Permute2MaskedUint16x16", argLength: 4, commutative: false},
{name: "PermuteMaskedInt16x16", argLength: 3, commutative: false},
{name: "PermuteMaskedUint16x16", argLength: 3, commutative: false}, {name: "PermuteMaskedUint16x16", argLength: 3, commutative: false},
{name: "PermuteMaskedInt16x16", argLength: 3, commutative: false},
{name: "PopCountUint16x16", argLength: 1, commutative: false}, {name: "PopCountUint16x16", argLength: 1, commutative: false},
{name: "PopCountMaskedUint16x16", argLength: 2, commutative: false}, {name: "PopCountMaskedUint16x16", argLength: 2, commutative: false},
{name: "SaturatedAddUint16x16", argLength: 2, commutative: true}, {name: "SaturatedAddUint16x16", argLength: 2, commutative: true},
@ -960,12 +960,12 @@ func simdGenericOps() []opData {
{name: "MulHighMaskedUint16x32", argLength: 3, commutative: true}, {name: "MulHighMaskedUint16x32", argLength: 3, commutative: true},
{name: "NotEqualUint16x32", argLength: 2, commutative: true}, {name: "NotEqualUint16x32", argLength: 2, commutative: true},
{name: "NotEqualMaskedUint16x32", argLength: 3, commutative: true}, {name: "NotEqualMaskedUint16x32", argLength: 3, commutative: true},
{name: "PermuteInt16x32", argLength: 2, commutative: false},
{name: "PermuteUint16x32", argLength: 2, commutative: false}, {name: "PermuteUint16x32", argLength: 2, commutative: false},
{name: "Permute2Int16x32", argLength: 3, commutative: false}, {name: "PermuteInt16x32", argLength: 2, commutative: false},
{name: "Permute2Uint16x32", argLength: 3, commutative: false}, {name: "Permute2Uint16x32", argLength: 3, commutative: false},
{name: "Permute2MaskedInt16x32", argLength: 4, commutative: false}, {name: "Permute2Int16x32", argLength: 3, commutative: false},
{name: "Permute2MaskedUint16x32", argLength: 4, commutative: false}, {name: "Permute2MaskedUint16x32", argLength: 4, commutative: false},
{name: "Permute2MaskedInt16x32", argLength: 4, commutative: false},
{name: "PermuteMaskedUint16x32", argLength: 3, commutative: false}, {name: "PermuteMaskedUint16x32", argLength: 3, commutative: false},
{name: "PermuteMaskedInt16x32", argLength: 3, commutative: false}, {name: "PermuteMaskedInt16x32", argLength: 3, commutative: false},
{name: "PopCountUint16x32", argLength: 1, commutative: false}, {name: "PopCountUint16x32", argLength: 1, commutative: false},
@ -1016,14 +1016,14 @@ func simdGenericOps() []opData {
{name: "OrUint16x8", argLength: 2, commutative: true}, {name: "OrUint16x8", argLength: 2, commutative: true},
{name: "PairwiseAddUint16x8", argLength: 2, commutative: false}, {name: "PairwiseAddUint16x8", argLength: 2, commutative: false},
{name: "PairwiseSubUint16x8", argLength: 2, commutative: false}, {name: "PairwiseSubUint16x8", argLength: 2, commutative: false},
{name: "PermuteUint16x8", argLength: 2, commutative: false},
{name: "PermuteInt16x8", argLength: 2, commutative: false}, {name: "PermuteInt16x8", argLength: 2, commutative: false},
{name: "PermuteUint16x8", argLength: 2, commutative: false},
{name: "Permute2Int16x8", argLength: 3, commutative: false}, {name: "Permute2Int16x8", argLength: 3, commutative: false},
{name: "Permute2Uint16x8", argLength: 3, commutative: false}, {name: "Permute2Uint16x8", argLength: 3, commutative: false},
{name: "Permute2MaskedUint16x8", argLength: 4, commutative: false},
{name: "Permute2MaskedInt16x8", argLength: 4, commutative: false}, {name: "Permute2MaskedInt16x8", argLength: 4, commutative: false},
{name: "PermuteMaskedInt16x8", argLength: 3, commutative: false}, {name: "Permute2MaskedUint16x8", argLength: 4, commutative: false},
{name: "PermuteMaskedUint16x8", argLength: 3, commutative: false}, {name: "PermuteMaskedUint16x8", argLength: 3, commutative: false},
{name: "PermuteMaskedInt16x8", argLength: 3, commutative: false},
{name: "PopCountUint16x8", argLength: 1, commutative: false}, {name: "PopCountUint16x8", argLength: 1, commutative: false},
{name: "PopCountMaskedUint16x8", argLength: 2, commutative: false}, {name: "PopCountMaskedUint16x8", argLength: 2, commutative: false},
{name: "SaturatedAddUint16x8", argLength: 2, commutative: true}, {name: "SaturatedAddUint16x8", argLength: 2, commutative: true},
@ -1070,26 +1070,24 @@ func simdGenericOps() []opData {
{name: "NotEqualMaskedUint32x16", argLength: 3, commutative: true}, {name: "NotEqualMaskedUint32x16", argLength: 3, commutative: true},
{name: "OrUint32x16", argLength: 2, commutative: true}, {name: "OrUint32x16", argLength: 2, commutative: true},
{name: "OrMaskedUint32x16", argLength: 3, commutative: true}, {name: "OrMaskedUint32x16", argLength: 3, commutative: true},
{name: "PermuteInt32x16", argLength: 2, commutative: false},
{name: "PermuteFloat32x16", argLength: 2, commutative: false}, {name: "PermuteFloat32x16", argLength: 2, commutative: false},
{name: "PermuteInt32x16", argLength: 2, commutative: false},
{name: "PermuteUint32x16", argLength: 2, commutative: false}, {name: "PermuteUint32x16", argLength: 2, commutative: false},
{name: "Permute2Uint32x16", argLength: 3, commutative: false}, {name: "Permute2Uint32x16", argLength: 3, commutative: false},
{name: "Permute2Float32x16", argLength: 3, commutative: false}, {name: "Permute2Float32x16", argLength: 3, commutative: false},
{name: "Permute2Int32x16", argLength: 3, commutative: false}, {name: "Permute2Int32x16", argLength: 3, commutative: false},
{name: "Permute2MaskedUint32x16", argLength: 4, commutative: false},
{name: "Permute2MaskedInt32x16", argLength: 4, commutative: false}, {name: "Permute2MaskedInt32x16", argLength: 4, commutative: false},
{name: "Permute2MaskedFloat32x16", argLength: 4, commutative: false}, {name: "Permute2MaskedFloat32x16", argLength: 4, commutative: false},
{name: "Permute2MaskedUint32x16", argLength: 4, commutative: false},
{name: "PermuteMaskedInt32x16", argLength: 3, commutative: false},
{name: "PermuteMaskedFloat32x16", argLength: 3, commutative: false}, {name: "PermuteMaskedFloat32x16", argLength: 3, commutative: false},
{name: "PermuteMaskedUint32x16", argLength: 3, commutative: false}, {name: "PermuteMaskedUint32x16", argLength: 3, commutative: false},
{name: "PermuteMaskedInt32x16", argLength: 3, commutative: false},
{name: "PopCountUint32x16", argLength: 1, commutative: false}, {name: "PopCountUint32x16", argLength: 1, commutative: false},
{name: "PopCountMaskedUint32x16", argLength: 2, commutative: false}, {name: "PopCountMaskedUint32x16", argLength: 2, commutative: false},
{name: "RotateLeftUint32x16", argLength: 2, commutative: false}, {name: "RotateLeftUint32x16", argLength: 2, commutative: false},
{name: "RotateLeftMaskedUint32x16", argLength: 3, commutative: false}, {name: "RotateLeftMaskedUint32x16", argLength: 3, commutative: false},
{name: "RotateRightUint32x16", argLength: 2, commutative: false}, {name: "RotateRightUint32x16", argLength: 2, commutative: false},
{name: "RotateRightMaskedUint32x16", argLength: 3, commutative: false}, {name: "RotateRightMaskedUint32x16", argLength: 3, commutative: false},
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 3, commutative: false},
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16", argLength: 4, commutative: false},
{name: "ShiftAllLeftUint32x16", argLength: 2, commutative: false}, {name: "ShiftAllLeftUint32x16", argLength: 2, commutative: false},
{name: "ShiftAllLeftMaskedUint32x16", argLength: 3, commutative: false}, {name: "ShiftAllLeftMaskedUint32x16", argLength: 3, commutative: false},
{name: "ShiftAllRightUint32x16", argLength: 2, commutative: false}, {name: "ShiftAllRightUint32x16", argLength: 2, commutative: false},
@ -1104,8 +1102,6 @@ func simdGenericOps() []opData {
{name: "ShiftRightMaskedUint32x16", argLength: 3, commutative: false}, {name: "ShiftRightMaskedUint32x16", argLength: 3, commutative: false},
{name: "SubUint32x16", argLength: 2, commutative: false}, {name: "SubUint32x16", argLength: 2, commutative: false},
{name: "SubMaskedUint32x16", argLength: 3, commutative: false}, {name: "SubMaskedUint32x16", argLength: 3, commutative: false},
{name: "UnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 3, commutative: false},
{name: "UnsignedSignedQuadDotProdAccumulateMaskedUint32x16", argLength: 4, commutative: false},
{name: "XorUint32x16", argLength: 2, commutative: true}, {name: "XorUint32x16", argLength: 2, commutative: true},
{name: "XorMaskedUint32x16", argLength: 3, commutative: true}, {name: "XorMaskedUint32x16", argLength: 3, commutative: true},
{name: "AddUint32x4", argLength: 2, commutative: true}, {name: "AddUint32x4", argLength: 2, commutative: true},
@ -1136,20 +1132,18 @@ func simdGenericOps() []opData {
{name: "OrMaskedUint32x4", argLength: 3, commutative: true}, {name: "OrMaskedUint32x4", argLength: 3, commutative: true},
{name: "PairwiseAddUint32x4", argLength: 2, commutative: false}, {name: "PairwiseAddUint32x4", argLength: 2, commutative: false},
{name: "PairwiseSubUint32x4", argLength: 2, commutative: false}, {name: "PairwiseSubUint32x4", argLength: 2, commutative: false},
{name: "Permute2Float32x4", argLength: 3, commutative: false},
{name: "Permute2Uint32x4", argLength: 3, commutative: false}, {name: "Permute2Uint32x4", argLength: 3, commutative: false},
{name: "Permute2Int32x4", argLength: 3, commutative: false}, {name: "Permute2Int32x4", argLength: 3, commutative: false},
{name: "Permute2Float32x4", argLength: 3, commutative: false},
{name: "Permute2MaskedFloat32x4", argLength: 4, commutative: false},
{name: "Permute2MaskedInt32x4", argLength: 4, commutative: false}, {name: "Permute2MaskedInt32x4", argLength: 4, commutative: false},
{name: "Permute2MaskedUint32x4", argLength: 4, commutative: false}, {name: "Permute2MaskedUint32x4", argLength: 4, commutative: false},
{name: "Permute2MaskedFloat32x4", argLength: 4, commutative: false},
{name: "PopCountUint32x4", argLength: 1, commutative: false}, {name: "PopCountUint32x4", argLength: 1, commutative: false},
{name: "PopCountMaskedUint32x4", argLength: 2, commutative: false}, {name: "PopCountMaskedUint32x4", argLength: 2, commutative: false},
{name: "RotateLeftUint32x4", argLength: 2, commutative: false}, {name: "RotateLeftUint32x4", argLength: 2, commutative: false},
{name: "RotateLeftMaskedUint32x4", argLength: 3, commutative: false}, {name: "RotateLeftMaskedUint32x4", argLength: 3, commutative: false},
{name: "RotateRightUint32x4", argLength: 2, commutative: false}, {name: "RotateRightUint32x4", argLength: 2, commutative: false},
{name: "RotateRightMaskedUint32x4", argLength: 3, commutative: false}, {name: "RotateRightMaskedUint32x4", argLength: 3, commutative: false},
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 3, commutative: false},
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4", argLength: 4, commutative: false},
{name: "ShiftAllLeftUint32x4", argLength: 2, commutative: false}, {name: "ShiftAllLeftUint32x4", argLength: 2, commutative: false},
{name: "ShiftAllLeftMaskedUint32x4", argLength: 3, commutative: false}, {name: "ShiftAllLeftMaskedUint32x4", argLength: 3, commutative: false},
{name: "ShiftAllRightUint32x4", argLength: 2, commutative: false}, {name: "ShiftAllRightUint32x4", argLength: 2, commutative: false},
@ -1164,8 +1158,6 @@ func simdGenericOps() []opData {
{name: "ShiftRightMaskedUint32x4", argLength: 3, commutative: false}, {name: "ShiftRightMaskedUint32x4", argLength: 3, commutative: false},
{name: "SubUint32x4", argLength: 2, commutative: false}, {name: "SubUint32x4", argLength: 2, commutative: false},
{name: "SubMaskedUint32x4", argLength: 3, commutative: false}, {name: "SubMaskedUint32x4", argLength: 3, commutative: false},
{name: "UnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 3, commutative: false},
{name: "UnsignedSignedQuadDotProdAccumulateMaskedUint32x4", argLength: 4, commutative: false},
{name: "XorUint32x4", argLength: 2, commutative: true}, {name: "XorUint32x4", argLength: 2, commutative: true},
{name: "XorMaskedUint32x4", argLength: 3, commutative: true}, {name: "XorMaskedUint32x4", argLength: 3, commutative: true},
{name: "AddUint32x8", argLength: 2, commutative: true}, {name: "AddUint32x8", argLength: 2, commutative: true},
@ -1197,14 +1189,14 @@ func simdGenericOps() []opData {
{name: "PairwiseAddUint32x8", argLength: 2, commutative: false}, {name: "PairwiseAddUint32x8", argLength: 2, commutative: false},
{name: "PairwiseSubUint32x8", argLength: 2, commutative: false}, {name: "PairwiseSubUint32x8", argLength: 2, commutative: false},
{name: "PermuteUint32x8", argLength: 2, commutative: false}, {name: "PermuteUint32x8", argLength: 2, commutative: false},
{name: "PermuteInt32x8", argLength: 2, commutative: false},
{name: "PermuteFloat32x8", argLength: 2, commutative: false}, {name: "PermuteFloat32x8", argLength: 2, commutative: false},
{name: "Permute2Uint32x8", argLength: 3, commutative: false}, {name: "PermuteInt32x8", argLength: 2, commutative: false},
{name: "Permute2Float32x8", argLength: 3, commutative: false},
{name: "Permute2Int32x8", argLength: 3, commutative: false}, {name: "Permute2Int32x8", argLength: 3, commutative: false},
{name: "Permute2Float32x8", argLength: 3, commutative: false},
{name: "Permute2Uint32x8", argLength: 3, commutative: false},
{name: "Permute2MaskedFloat32x8", argLength: 4, commutative: false}, {name: "Permute2MaskedFloat32x8", argLength: 4, commutative: false},
{name: "Permute2MaskedInt32x8", argLength: 4, commutative: false},
{name: "Permute2MaskedUint32x8", argLength: 4, commutative: false}, {name: "Permute2MaskedUint32x8", argLength: 4, commutative: false},
{name: "Permute2MaskedInt32x8", argLength: 4, commutative: false},
{name: "PermuteMaskedInt32x8", argLength: 3, commutative: false}, {name: "PermuteMaskedInt32x8", argLength: 3, commutative: false},
{name: "PermuteMaskedUint32x8", argLength: 3, commutative: false}, {name: "PermuteMaskedUint32x8", argLength: 3, commutative: false},
{name: "PermuteMaskedFloat32x8", argLength: 3, commutative: false}, {name: "PermuteMaskedFloat32x8", argLength: 3, commutative: false},
@ -1214,8 +1206,6 @@ func simdGenericOps() []opData {
{name: "RotateLeftMaskedUint32x8", argLength: 3, commutative: false}, {name: "RotateLeftMaskedUint32x8", argLength: 3, commutative: false},
{name: "RotateRightUint32x8", argLength: 2, commutative: false}, {name: "RotateRightUint32x8", argLength: 2, commutative: false},
{name: "RotateRightMaskedUint32x8", argLength: 3, commutative: false}, {name: "RotateRightMaskedUint32x8", argLength: 3, commutative: false},
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 3, commutative: false},
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8", argLength: 4, commutative: false},
{name: "ShiftAllLeftUint32x8", argLength: 2, commutative: false}, {name: "ShiftAllLeftUint32x8", argLength: 2, commutative: false},
{name: "ShiftAllLeftMaskedUint32x8", argLength: 3, commutative: false}, {name: "ShiftAllLeftMaskedUint32x8", argLength: 3, commutative: false},
{name: "ShiftAllRightUint32x8", argLength: 2, commutative: false}, {name: "ShiftAllRightUint32x8", argLength: 2, commutative: false},
@ -1230,8 +1220,6 @@ func simdGenericOps() []opData {
{name: "ShiftRightMaskedUint32x8", argLength: 3, commutative: false}, {name: "ShiftRightMaskedUint32x8", argLength: 3, commutative: false},
{name: "SubUint32x8", argLength: 2, commutative: false}, {name: "SubUint32x8", argLength: 2, commutative: false},
{name: "SubMaskedUint32x8", argLength: 3, commutative: false}, {name: "SubMaskedUint32x8", argLength: 3, commutative: false},
{name: "UnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 3, commutative: false},
{name: "UnsignedSignedQuadDotProdAccumulateMaskedUint32x8", argLength: 4, commutative: false},
{name: "XorUint32x8", argLength: 2, commutative: true}, {name: "XorUint32x8", argLength: 2, commutative: true},
{name: "XorMaskedUint32x8", argLength: 3, commutative: true}, {name: "XorMaskedUint32x8", argLength: 3, commutative: true},
{name: "AddUint64x2", argLength: 2, commutative: true}, {name: "AddUint64x2", argLength: 2, commutative: true},
@ -1265,8 +1253,8 @@ func simdGenericOps() []opData {
{name: "Permute2Uint64x2", argLength: 3, commutative: false}, {name: "Permute2Uint64x2", argLength: 3, commutative: false},
{name: "Permute2Int64x2", argLength: 3, commutative: false}, {name: "Permute2Int64x2", argLength: 3, commutative: false},
{name: "Permute2MaskedInt64x2", argLength: 4, commutative: false}, {name: "Permute2MaskedInt64x2", argLength: 4, commutative: false},
{name: "Permute2MaskedUint64x2", argLength: 4, commutative: false},
{name: "Permute2MaskedFloat64x2", argLength: 4, commutative: false}, {name: "Permute2MaskedFloat64x2", argLength: 4, commutative: false},
{name: "Permute2MaskedUint64x2", argLength: 4, commutative: false},
{name: "PopCountUint64x2", argLength: 1, commutative: false}, {name: "PopCountUint64x2", argLength: 1, commutative: false},
{name: "PopCountMaskedUint64x2", argLength: 2, commutative: false}, {name: "PopCountMaskedUint64x2", argLength: 2, commutative: false},
{name: "RotateLeftUint64x2", argLength: 2, commutative: false}, {name: "RotateLeftUint64x2", argLength: 2, commutative: false},
@ -1316,18 +1304,18 @@ func simdGenericOps() []opData {
{name: "NotEqualMaskedUint64x4", argLength: 3, commutative: true}, {name: "NotEqualMaskedUint64x4", argLength: 3, commutative: true},
{name: "OrUint64x4", argLength: 2, commutative: true}, {name: "OrUint64x4", argLength: 2, commutative: true},
{name: "OrMaskedUint64x4", argLength: 3, commutative: true}, {name: "OrMaskedUint64x4", argLength: 3, commutative: true},
{name: "PermuteFloat64x4", argLength: 2, commutative: false},
{name: "PermuteUint64x4", argLength: 2, commutative: false}, {name: "PermuteUint64x4", argLength: 2, commutative: false},
{name: "PermuteInt64x4", argLength: 2, commutative: false}, {name: "PermuteInt64x4", argLength: 2, commutative: false},
{name: "PermuteFloat64x4", argLength: 2, commutative: false},
{name: "Permute2Float64x4", argLength: 3, commutative: false},
{name: "Permute2Int64x4", argLength: 3, commutative: false}, {name: "Permute2Int64x4", argLength: 3, commutative: false},
{name: "Permute2Uint64x4", argLength: 3, commutative: false}, {name: "Permute2Uint64x4", argLength: 3, commutative: false},
{name: "Permute2Float64x4", argLength: 3, commutative: false},
{name: "Permute2MaskedFloat64x4", argLength: 4, commutative: false}, {name: "Permute2MaskedFloat64x4", argLength: 4, commutative: false},
{name: "Permute2MaskedUint64x4", argLength: 4, commutative: false}, {name: "Permute2MaskedUint64x4", argLength: 4, commutative: false},
{name: "Permute2MaskedInt64x4", argLength: 4, commutative: false}, {name: "Permute2MaskedInt64x4", argLength: 4, commutative: false},
{name: "PermuteMaskedFloat64x4", argLength: 3, commutative: false}, {name: "PermuteMaskedFloat64x4", argLength: 3, commutative: false},
{name: "PermuteMaskedUint64x4", argLength: 3, commutative: false},
{name: "PermuteMaskedInt64x4", argLength: 3, commutative: false}, {name: "PermuteMaskedInt64x4", argLength: 3, commutative: false},
{name: "PermuteMaskedUint64x4", argLength: 3, commutative: false},
{name: "PopCountUint64x4", argLength: 1, commutative: false}, {name: "PopCountUint64x4", argLength: 1, commutative: false},
{name: "PopCountMaskedUint64x4", argLength: 2, commutative: false}, {name: "PopCountMaskedUint64x4", argLength: 2, commutative: false},
{name: "RotateLeftUint64x4", argLength: 2, commutative: false}, {name: "RotateLeftUint64x4", argLength: 2, commutative: false},
@ -1377,18 +1365,18 @@ func simdGenericOps() []opData {
{name: "NotEqualMaskedUint64x8", argLength: 3, commutative: true}, {name: "NotEqualMaskedUint64x8", argLength: 3, commutative: true},
{name: "OrUint64x8", argLength: 2, commutative: true}, {name: "OrUint64x8", argLength: 2, commutative: true},
{name: "OrMaskedUint64x8", argLength: 3, commutative: true}, {name: "OrMaskedUint64x8", argLength: 3, commutative: true},
{name: "PermuteFloat64x8", argLength: 2, commutative: false},
{name: "PermuteInt64x8", argLength: 2, commutative: false}, {name: "PermuteInt64x8", argLength: 2, commutative: false},
{name: "PermuteUint64x8", argLength: 2, commutative: false}, {name: "PermuteUint64x8", argLength: 2, commutative: false},
{name: "PermuteFloat64x8", argLength: 2, commutative: false},
{name: "Permute2Uint64x8", argLength: 3, commutative: false},
{name: "Permute2Float64x8", argLength: 3, commutative: false},
{name: "Permute2Int64x8", argLength: 3, commutative: false}, {name: "Permute2Int64x8", argLength: 3, commutative: false},
{name: "Permute2Float64x8", argLength: 3, commutative: false},
{name: "Permute2Uint64x8", argLength: 3, commutative: false},
{name: "Permute2MaskedUint64x8", argLength: 4, commutative: false}, {name: "Permute2MaskedUint64x8", argLength: 4, commutative: false},
{name: "Permute2MaskedFloat64x8", argLength: 4, commutative: false},
{name: "Permute2MaskedInt64x8", argLength: 4, commutative: false}, {name: "Permute2MaskedInt64x8", argLength: 4, commutative: false},
{name: "Permute2MaskedFloat64x8", argLength: 4, commutative: false},
{name: "PermuteMaskedUint64x8", argLength: 3, commutative: false}, {name: "PermuteMaskedUint64x8", argLength: 3, commutative: false},
{name: "PermuteMaskedInt64x8", argLength: 3, commutative: false},
{name: "PermuteMaskedFloat64x8", argLength: 3, commutative: false}, {name: "PermuteMaskedFloat64x8", argLength: 3, commutative: false},
{name: "PermuteMaskedInt64x8", argLength: 3, commutative: false},
{name: "PopCountUint64x8", argLength: 1, commutative: false}, {name: "PopCountUint64x8", argLength: 1, commutative: false},
{name: "PopCountMaskedUint64x8", argLength: 2, commutative: false}, {name: "PopCountMaskedUint64x8", argLength: 2, commutative: false},
{name: "RotateLeftUint64x8", argLength: 2, commutative: false}, {name: "RotateLeftUint64x8", argLength: 2, commutative: false},
@ -1439,8 +1427,8 @@ func simdGenericOps() []opData {
{name: "OrUint8x16", argLength: 2, commutative: true}, {name: "OrUint8x16", argLength: 2, commutative: true},
{name: "PermuteUint8x16", argLength: 2, commutative: false}, {name: "PermuteUint8x16", argLength: 2, commutative: false},
{name: "PermuteInt8x16", argLength: 2, commutative: false}, {name: "PermuteInt8x16", argLength: 2, commutative: false},
{name: "Permute2Int8x16", argLength: 3, commutative: false},
{name: "Permute2Uint8x16", argLength: 3, commutative: false}, {name: "Permute2Uint8x16", argLength: 3, commutative: false},
{name: "Permute2Int8x16", argLength: 3, commutative: false},
{name: "Permute2MaskedInt8x16", argLength: 4, commutative: false}, {name: "Permute2MaskedInt8x16", argLength: 4, commutative: false},
{name: "Permute2MaskedUint8x16", argLength: 4, commutative: false}, {name: "Permute2MaskedUint8x16", argLength: 4, commutative: false},
{name: "PermuteMaskedUint8x16", argLength: 3, commutative: false}, {name: "PermuteMaskedUint8x16", argLength: 3, commutative: false},
@ -1486,10 +1474,10 @@ func simdGenericOps() []opData {
{name: "PermuteInt8x32", argLength: 2, commutative: false}, {name: "PermuteInt8x32", argLength: 2, commutative: false},
{name: "Permute2Int8x32", argLength: 3, commutative: false}, {name: "Permute2Int8x32", argLength: 3, commutative: false},
{name: "Permute2Uint8x32", argLength: 3, commutative: false}, {name: "Permute2Uint8x32", argLength: 3, commutative: false},
{name: "Permute2MaskedInt8x32", argLength: 4, commutative: false},
{name: "Permute2MaskedUint8x32", argLength: 4, commutative: false}, {name: "Permute2MaskedUint8x32", argLength: 4, commutative: false},
{name: "PermuteMaskedInt8x32", argLength: 3, commutative: false}, {name: "Permute2MaskedInt8x32", argLength: 4, commutative: false},
{name: "PermuteMaskedUint8x32", argLength: 3, commutative: false}, {name: "PermuteMaskedUint8x32", argLength: 3, commutative: false},
{name: "PermuteMaskedInt8x32", argLength: 3, commutative: false},
{name: "PopCountUint8x32", argLength: 1, commutative: false}, {name: "PopCountUint8x32", argLength: 1, commutative: false},
{name: "PopCountMaskedUint8x32", argLength: 2, commutative: false}, {name: "PopCountMaskedUint8x32", argLength: 2, commutative: false},
{name: "SaturatedAddUint8x32", argLength: 2, commutative: true}, {name: "SaturatedAddUint8x32", argLength: 2, commutative: true},

View file

@ -5314,8 +5314,8 @@ const (
OpPermute2Int16x16 OpPermute2Int16x16
OpPermute2MaskedInt16x16 OpPermute2MaskedInt16x16
OpPermute2MaskedUint16x16 OpPermute2MaskedUint16x16
OpPermuteMaskedInt16x16
OpPermuteMaskedUint16x16 OpPermuteMaskedUint16x16
OpPermuteMaskedInt16x16
OpPopCountUint16x16 OpPopCountUint16x16
OpPopCountMaskedUint16x16 OpPopCountMaskedUint16x16
OpSaturatedAddUint16x16 OpSaturatedAddUint16x16
@ -5360,12 +5360,12 @@ const (
OpMulHighMaskedUint16x32 OpMulHighMaskedUint16x32
OpNotEqualUint16x32 OpNotEqualUint16x32
OpNotEqualMaskedUint16x32 OpNotEqualMaskedUint16x32
OpPermuteInt16x32
OpPermuteUint16x32 OpPermuteUint16x32
OpPermute2Int16x32 OpPermuteInt16x32
OpPermute2Uint16x32 OpPermute2Uint16x32
OpPermute2MaskedInt16x32 OpPermute2Int16x32
OpPermute2MaskedUint16x32 OpPermute2MaskedUint16x32
OpPermute2MaskedInt16x32
OpPermuteMaskedUint16x32 OpPermuteMaskedUint16x32
OpPermuteMaskedInt16x32 OpPermuteMaskedInt16x32
OpPopCountUint16x32 OpPopCountUint16x32
@ -5416,14 +5416,14 @@ const (
OpOrUint16x8 OpOrUint16x8
OpPairwiseAddUint16x8 OpPairwiseAddUint16x8
OpPairwiseSubUint16x8 OpPairwiseSubUint16x8
OpPermuteUint16x8
OpPermuteInt16x8 OpPermuteInt16x8
OpPermuteUint16x8
OpPermute2Int16x8 OpPermute2Int16x8
OpPermute2Uint16x8 OpPermute2Uint16x8
OpPermute2MaskedUint16x8
OpPermute2MaskedInt16x8 OpPermute2MaskedInt16x8
OpPermuteMaskedInt16x8 OpPermute2MaskedUint16x8
OpPermuteMaskedUint16x8 OpPermuteMaskedUint16x8
OpPermuteMaskedInt16x8
OpPopCountUint16x8 OpPopCountUint16x8
OpPopCountMaskedUint16x8 OpPopCountMaskedUint16x8
OpSaturatedAddUint16x8 OpSaturatedAddUint16x8
@ -5470,26 +5470,24 @@ const (
OpNotEqualMaskedUint32x16 OpNotEqualMaskedUint32x16
OpOrUint32x16 OpOrUint32x16
OpOrMaskedUint32x16 OpOrMaskedUint32x16
OpPermuteInt32x16
OpPermuteFloat32x16 OpPermuteFloat32x16
OpPermuteInt32x16
OpPermuteUint32x16 OpPermuteUint32x16
OpPermute2Uint32x16 OpPermute2Uint32x16
OpPermute2Float32x16 OpPermute2Float32x16
OpPermute2Int32x16 OpPermute2Int32x16
OpPermute2MaskedUint32x16
OpPermute2MaskedInt32x16 OpPermute2MaskedInt32x16
OpPermute2MaskedFloat32x16 OpPermute2MaskedFloat32x16
OpPermute2MaskedUint32x16
OpPermuteMaskedInt32x16
OpPermuteMaskedFloat32x16 OpPermuteMaskedFloat32x16
OpPermuteMaskedUint32x16 OpPermuteMaskedUint32x16
OpPermuteMaskedInt32x16
OpPopCountUint32x16 OpPopCountUint32x16
OpPopCountMaskedUint32x16 OpPopCountMaskedUint32x16
OpRotateLeftUint32x16 OpRotateLeftUint32x16
OpRotateLeftMaskedUint32x16 OpRotateLeftMaskedUint32x16
OpRotateRightUint32x16 OpRotateRightUint32x16
OpRotateRightMaskedUint32x16 OpRotateRightMaskedUint32x16
OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16
OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16
OpShiftAllLeftUint32x16 OpShiftAllLeftUint32x16
OpShiftAllLeftMaskedUint32x16 OpShiftAllLeftMaskedUint32x16
OpShiftAllRightUint32x16 OpShiftAllRightUint32x16
@ -5504,8 +5502,6 @@ const (
OpShiftRightMaskedUint32x16 OpShiftRightMaskedUint32x16
OpSubUint32x16 OpSubUint32x16
OpSubMaskedUint32x16 OpSubMaskedUint32x16
OpUnsignedSignedQuadDotProdAccumulateUint32x16
OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x16
OpXorUint32x16 OpXorUint32x16
OpXorMaskedUint32x16 OpXorMaskedUint32x16
OpAddUint32x4 OpAddUint32x4
@ -5536,20 +5532,18 @@ const (
OpOrMaskedUint32x4 OpOrMaskedUint32x4
OpPairwiseAddUint32x4 OpPairwiseAddUint32x4
OpPairwiseSubUint32x4 OpPairwiseSubUint32x4
OpPermute2Float32x4
OpPermute2Uint32x4 OpPermute2Uint32x4
OpPermute2Int32x4 OpPermute2Int32x4
OpPermute2Float32x4
OpPermute2MaskedFloat32x4
OpPermute2MaskedInt32x4 OpPermute2MaskedInt32x4
OpPermute2MaskedUint32x4 OpPermute2MaskedUint32x4
OpPermute2MaskedFloat32x4
OpPopCountUint32x4 OpPopCountUint32x4
OpPopCountMaskedUint32x4 OpPopCountMaskedUint32x4
OpRotateLeftUint32x4 OpRotateLeftUint32x4
OpRotateLeftMaskedUint32x4 OpRotateLeftMaskedUint32x4
OpRotateRightUint32x4 OpRotateRightUint32x4
OpRotateRightMaskedUint32x4 OpRotateRightMaskedUint32x4
OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4
OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4
OpShiftAllLeftUint32x4 OpShiftAllLeftUint32x4
OpShiftAllLeftMaskedUint32x4 OpShiftAllLeftMaskedUint32x4
OpShiftAllRightUint32x4 OpShiftAllRightUint32x4
@ -5564,8 +5558,6 @@ const (
OpShiftRightMaskedUint32x4 OpShiftRightMaskedUint32x4
OpSubUint32x4 OpSubUint32x4
OpSubMaskedUint32x4 OpSubMaskedUint32x4
OpUnsignedSignedQuadDotProdAccumulateUint32x4
OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x4
OpXorUint32x4 OpXorUint32x4
OpXorMaskedUint32x4 OpXorMaskedUint32x4
OpAddUint32x8 OpAddUint32x8
@ -5597,14 +5589,14 @@ const (
OpPairwiseAddUint32x8 OpPairwiseAddUint32x8
OpPairwiseSubUint32x8 OpPairwiseSubUint32x8
OpPermuteUint32x8 OpPermuteUint32x8
OpPermuteInt32x8
OpPermuteFloat32x8 OpPermuteFloat32x8
OpPermute2Uint32x8 OpPermuteInt32x8
OpPermute2Float32x8
OpPermute2Int32x8 OpPermute2Int32x8
OpPermute2Float32x8
OpPermute2Uint32x8
OpPermute2MaskedFloat32x8 OpPermute2MaskedFloat32x8
OpPermute2MaskedInt32x8
OpPermute2MaskedUint32x8 OpPermute2MaskedUint32x8
OpPermute2MaskedInt32x8
OpPermuteMaskedInt32x8 OpPermuteMaskedInt32x8
OpPermuteMaskedUint32x8 OpPermuteMaskedUint32x8
OpPermuteMaskedFloat32x8 OpPermuteMaskedFloat32x8
@ -5614,8 +5606,6 @@ const (
OpRotateLeftMaskedUint32x8 OpRotateLeftMaskedUint32x8
OpRotateRightUint32x8 OpRotateRightUint32x8
OpRotateRightMaskedUint32x8 OpRotateRightMaskedUint32x8
OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8
OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8
OpShiftAllLeftUint32x8 OpShiftAllLeftUint32x8
OpShiftAllLeftMaskedUint32x8 OpShiftAllLeftMaskedUint32x8
OpShiftAllRightUint32x8 OpShiftAllRightUint32x8
@ -5630,8 +5620,6 @@ const (
OpShiftRightMaskedUint32x8 OpShiftRightMaskedUint32x8
OpSubUint32x8 OpSubUint32x8
OpSubMaskedUint32x8 OpSubMaskedUint32x8
OpUnsignedSignedQuadDotProdAccumulateUint32x8
OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x8
OpXorUint32x8 OpXorUint32x8
OpXorMaskedUint32x8 OpXorMaskedUint32x8
OpAddUint64x2 OpAddUint64x2
@ -5665,8 +5653,8 @@ const (
OpPermute2Uint64x2 OpPermute2Uint64x2
OpPermute2Int64x2 OpPermute2Int64x2
OpPermute2MaskedInt64x2 OpPermute2MaskedInt64x2
OpPermute2MaskedUint64x2
OpPermute2MaskedFloat64x2 OpPermute2MaskedFloat64x2
OpPermute2MaskedUint64x2
OpPopCountUint64x2 OpPopCountUint64x2
OpPopCountMaskedUint64x2 OpPopCountMaskedUint64x2
OpRotateLeftUint64x2 OpRotateLeftUint64x2
@ -5716,18 +5704,18 @@ const (
OpNotEqualMaskedUint64x4 OpNotEqualMaskedUint64x4
OpOrUint64x4 OpOrUint64x4
OpOrMaskedUint64x4 OpOrMaskedUint64x4
OpPermuteFloat64x4
OpPermuteUint64x4 OpPermuteUint64x4
OpPermuteInt64x4 OpPermuteInt64x4
OpPermuteFloat64x4
OpPermute2Float64x4
OpPermute2Int64x4 OpPermute2Int64x4
OpPermute2Uint64x4 OpPermute2Uint64x4
OpPermute2Float64x4
OpPermute2MaskedFloat64x4 OpPermute2MaskedFloat64x4
OpPermute2MaskedUint64x4 OpPermute2MaskedUint64x4
OpPermute2MaskedInt64x4 OpPermute2MaskedInt64x4
OpPermuteMaskedFloat64x4 OpPermuteMaskedFloat64x4
OpPermuteMaskedUint64x4
OpPermuteMaskedInt64x4 OpPermuteMaskedInt64x4
OpPermuteMaskedUint64x4
OpPopCountUint64x4 OpPopCountUint64x4
OpPopCountMaskedUint64x4 OpPopCountMaskedUint64x4
OpRotateLeftUint64x4 OpRotateLeftUint64x4
@ -5777,18 +5765,18 @@ const (
OpNotEqualMaskedUint64x8 OpNotEqualMaskedUint64x8
OpOrUint64x8 OpOrUint64x8
OpOrMaskedUint64x8 OpOrMaskedUint64x8
OpPermuteFloat64x8
OpPermuteInt64x8 OpPermuteInt64x8
OpPermuteUint64x8 OpPermuteUint64x8
OpPermuteFloat64x8
OpPermute2Uint64x8
OpPermute2Float64x8
OpPermute2Int64x8 OpPermute2Int64x8
OpPermute2Float64x8
OpPermute2Uint64x8
OpPermute2MaskedUint64x8 OpPermute2MaskedUint64x8
OpPermute2MaskedFloat64x8
OpPermute2MaskedInt64x8 OpPermute2MaskedInt64x8
OpPermute2MaskedFloat64x8
OpPermuteMaskedUint64x8 OpPermuteMaskedUint64x8
OpPermuteMaskedInt64x8
OpPermuteMaskedFloat64x8 OpPermuteMaskedFloat64x8
OpPermuteMaskedInt64x8
OpPopCountUint64x8 OpPopCountUint64x8
OpPopCountMaskedUint64x8 OpPopCountMaskedUint64x8
OpRotateLeftUint64x8 OpRotateLeftUint64x8
@ -5839,8 +5827,8 @@ const (
OpOrUint8x16 OpOrUint8x16
OpPermuteUint8x16 OpPermuteUint8x16
OpPermuteInt8x16 OpPermuteInt8x16
OpPermute2Int8x16
OpPermute2Uint8x16 OpPermute2Uint8x16
OpPermute2Int8x16
OpPermute2MaskedInt8x16 OpPermute2MaskedInt8x16
OpPermute2MaskedUint8x16 OpPermute2MaskedUint8x16
OpPermuteMaskedUint8x16 OpPermuteMaskedUint8x16
@ -5886,10 +5874,10 @@ const (
OpPermuteInt8x32 OpPermuteInt8x32
OpPermute2Int8x32 OpPermute2Int8x32
OpPermute2Uint8x32 OpPermute2Uint8x32
OpPermute2MaskedInt8x32
OpPermute2MaskedUint8x32 OpPermute2MaskedUint8x32
OpPermuteMaskedInt8x32 OpPermute2MaskedInt8x32
OpPermuteMaskedUint8x32 OpPermuteMaskedUint8x32
OpPermuteMaskedInt8x32
OpPopCountUint8x32 OpPopCountUint8x32
OpPopCountMaskedUint8x32 OpPopCountMaskedUint8x32
OpSaturatedAddUint8x32 OpSaturatedAddUint8x32
@ -65610,12 +65598,12 @@ var opcodeTable = [...]opInfo{
generic: true, generic: true,
}, },
{ {
name: "PermuteMaskedInt16x16", name: "PermuteMaskedUint16x16",
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{ {
name: "PermuteMaskedUint16x16", name: "PermuteMaskedInt16x16",
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
@ -65856,19 +65844,14 @@ var opcodeTable = [...]opInfo{
commutative: true, commutative: true,
generic: true, generic: true,
}, },
{
name: "PermuteInt16x32",
argLen: 2,
generic: true,
},
{ {
name: "PermuteUint16x32", name: "PermuteUint16x32",
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
{ {
name: "Permute2Int16x32", name: "PermuteInt16x32",
argLen: 3, argLen: 2,
generic: true, generic: true,
}, },
{ {
@ -65877,8 +65860,8 @@ var opcodeTable = [...]opInfo{
generic: true, generic: true,
}, },
{ {
name: "Permute2MaskedInt16x32", name: "Permute2Int16x32",
argLen: 4, argLen: 3,
generic: true, generic: true,
}, },
{ {
@ -65886,6 +65869,11 @@ var opcodeTable = [...]opInfo{
argLen: 4, argLen: 4,
generic: true, generic: true,
}, },
{
name: "Permute2MaskedInt16x32",
argLen: 4,
generic: true,
},
{ {
name: "PermuteMaskedUint16x32", name: "PermuteMaskedUint16x32",
argLen: 3, argLen: 3,
@ -66155,12 +66143,12 @@ var opcodeTable = [...]opInfo{
generic: true, generic: true,
}, },
{ {
name: "PermuteUint16x8", name: "PermuteInt16x8",
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
{ {
name: "PermuteInt16x8", name: "PermuteUint16x8",
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
@ -66174,19 +66162,14 @@ var opcodeTable = [...]opInfo{
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{
name: "Permute2MaskedUint16x8",
argLen: 4,
generic: true,
},
{ {
name: "Permute2MaskedInt16x8", name: "Permute2MaskedInt16x8",
argLen: 4, argLen: 4,
generic: true, generic: true,
}, },
{ {
name: "PermuteMaskedInt16x8", name: "Permute2MaskedUint16x8",
argLen: 3, argLen: 4,
generic: true, generic: true,
}, },
{ {
@ -66194,6 +66177,11 @@ var opcodeTable = [...]opInfo{
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{
name: "PermuteMaskedInt16x8",
argLen: 3,
generic: true,
},
{ {
name: "PopCountUint16x8", name: "PopCountUint16x8",
argLen: 1, argLen: 1,
@ -66442,12 +66430,12 @@ var opcodeTable = [...]opInfo{
generic: true, generic: true,
}, },
{ {
name: "PermuteInt32x16", name: "PermuteFloat32x16",
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
{ {
name: "PermuteFloat32x16", name: "PermuteInt32x16",
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
@ -66471,11 +66459,6 @@ var opcodeTable = [...]opInfo{
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{
name: "Permute2MaskedUint32x16",
argLen: 4,
generic: true,
},
{ {
name: "Permute2MaskedInt32x16", name: "Permute2MaskedInt32x16",
argLen: 4, argLen: 4,
@ -66486,6 +66469,16 @@ var opcodeTable = [...]opInfo{
argLen: 4, argLen: 4,
generic: true, generic: true,
}, },
{
name: "Permute2MaskedUint32x16",
argLen: 4,
generic: true,
},
{
name: "PermuteMaskedInt32x16",
argLen: 3,
generic: true,
},
{ {
name: "PermuteMaskedFloat32x16", name: "PermuteMaskedFloat32x16",
argLen: 3, argLen: 3,
@ -66496,11 +66489,6 @@ var opcodeTable = [...]opInfo{
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{
name: "PermuteMaskedInt32x16",
argLen: 3,
generic: true,
},
{ {
name: "PopCountUint32x16", name: "PopCountUint32x16",
argLen: 1, argLen: 1,
@ -66531,16 +66519,6 @@ var opcodeTable = [...]opInfo{
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{
name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16",
argLen: 3,
generic: true,
},
{
name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16",
argLen: 4,
generic: true,
},
{ {
name: "ShiftAllLeftUint32x16", name: "ShiftAllLeftUint32x16",
argLen: 2, argLen: 2,
@ -66611,16 +66589,6 @@ var opcodeTable = [...]opInfo{
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{
name: "UnsignedSignedQuadDotProdAccumulateUint32x16",
argLen: 3,
generic: true,
},
{
name: "UnsignedSignedQuadDotProdAccumulateMaskedUint32x16",
argLen: 4,
generic: true,
},
{ {
name: "XorUint32x16", name: "XorUint32x16",
argLen: 2, argLen: 2,
@ -66788,6 +66756,11 @@ var opcodeTable = [...]opInfo{
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
{
name: "Permute2Float32x4",
argLen: 3,
generic: true,
},
{ {
name: "Permute2Uint32x4", name: "Permute2Uint32x4",
argLen: 3, argLen: 3,
@ -66798,16 +66771,6 @@ var opcodeTable = [...]opInfo{
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{
name: "Permute2Float32x4",
argLen: 3,
generic: true,
},
{
name: "Permute2MaskedFloat32x4",
argLen: 4,
generic: true,
},
{ {
name: "Permute2MaskedInt32x4", name: "Permute2MaskedInt32x4",
argLen: 4, argLen: 4,
@ -66818,6 +66781,11 @@ var opcodeTable = [...]opInfo{
argLen: 4, argLen: 4,
generic: true, generic: true,
}, },
{
name: "Permute2MaskedFloat32x4",
argLen: 4,
generic: true,
},
{ {
name: "PopCountUint32x4", name: "PopCountUint32x4",
argLen: 1, argLen: 1,
@ -66848,16 +66816,6 @@ var opcodeTable = [...]opInfo{
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{
name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x4",
argLen: 3,
generic: true,
},
{
name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4",
argLen: 4,
generic: true,
},
{ {
name: "ShiftAllLeftUint32x4", name: "ShiftAllLeftUint32x4",
argLen: 2, argLen: 2,
@ -66928,16 +66886,6 @@ var opcodeTable = [...]opInfo{
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{
name: "UnsignedSignedQuadDotProdAccumulateUint32x4",
argLen: 3,
generic: true,
},
{
name: "UnsignedSignedQuadDotProdAccumulateMaskedUint32x4",
argLen: 4,
generic: true,
},
{ {
name: "XorUint32x4", name: "XorUint32x4",
argLen: 2, argLen: 2,
@ -67110,18 +67058,18 @@ var opcodeTable = [...]opInfo{
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
{
name: "PermuteInt32x8",
argLen: 2,
generic: true,
},
{ {
name: "PermuteFloat32x8", name: "PermuteFloat32x8",
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
{ {
name: "Permute2Uint32x8", name: "PermuteInt32x8",
argLen: 2,
generic: true,
},
{
name: "Permute2Int32x8",
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
@ -67131,7 +67079,7 @@ var opcodeTable = [...]opInfo{
generic: true, generic: true,
}, },
{ {
name: "Permute2Int32x8", name: "Permute2Uint32x8",
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
@ -67141,12 +67089,12 @@ var opcodeTable = [...]opInfo{
generic: true, generic: true,
}, },
{ {
name: "Permute2MaskedInt32x8", name: "Permute2MaskedUint32x8",
argLen: 4, argLen: 4,
generic: true, generic: true,
}, },
{ {
name: "Permute2MaskedUint32x8", name: "Permute2MaskedInt32x8",
argLen: 4, argLen: 4,
generic: true, generic: true,
}, },
@ -67195,16 +67143,6 @@ var opcodeTable = [...]opInfo{
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{
name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8",
argLen: 3,
generic: true,
},
{
name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8",
argLen: 4,
generic: true,
},
{ {
name: "ShiftAllLeftUint32x8", name: "ShiftAllLeftUint32x8",
argLen: 2, argLen: 2,
@ -67275,16 +67213,6 @@ var opcodeTable = [...]opInfo{
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{
name: "UnsignedSignedQuadDotProdAccumulateUint32x8",
argLen: 3,
generic: true,
},
{
name: "UnsignedSignedQuadDotProdAccumulateMaskedUint32x8",
argLen: 4,
generic: true,
},
{ {
name: "XorUint32x8", name: "XorUint32x8",
argLen: 2, argLen: 2,
@ -67469,12 +67397,12 @@ var opcodeTable = [...]opInfo{
generic: true, generic: true,
}, },
{ {
name: "Permute2MaskedUint64x2", name: "Permute2MaskedFloat64x2",
argLen: 4, argLen: 4,
generic: true, generic: true,
}, },
{ {
name: "Permute2MaskedFloat64x2", name: "Permute2MaskedUint64x2",
argLen: 4, argLen: 4,
generic: true, generic: true,
}, },
@ -67741,11 +67669,6 @@ var opcodeTable = [...]opInfo{
commutative: true, commutative: true,
generic: true, generic: true,
}, },
{
name: "PermuteFloat64x4",
argLen: 2,
generic: true,
},
{ {
name: "PermuteUint64x4", name: "PermuteUint64x4",
argLen: 2, argLen: 2,
@ -67756,6 +67679,16 @@ var opcodeTable = [...]opInfo{
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
{
name: "PermuteFloat64x4",
argLen: 2,
generic: true,
},
{
name: "Permute2Float64x4",
argLen: 3,
generic: true,
},
{ {
name: "Permute2Int64x4", name: "Permute2Int64x4",
argLen: 3, argLen: 3,
@ -67766,11 +67699,6 @@ var opcodeTable = [...]opInfo{
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{
name: "Permute2Float64x4",
argLen: 3,
generic: true,
},
{ {
name: "Permute2MaskedFloat64x4", name: "Permute2MaskedFloat64x4",
argLen: 4, argLen: 4,
@ -67792,12 +67720,12 @@ var opcodeTable = [...]opInfo{
generic: true, generic: true,
}, },
{ {
name: "PermuteMaskedUint64x4", name: "PermuteMaskedInt64x4",
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{ {
name: "PermuteMaskedInt64x4", name: "PermuteMaskedUint64x4",
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
@ -68064,6 +67992,11 @@ var opcodeTable = [...]opInfo{
commutative: true, commutative: true,
generic: true, generic: true,
}, },
{
name: "PermuteFloat64x8",
argLen: 2,
generic: true,
},
{ {
name: "PermuteInt64x8", name: "PermuteInt64x8",
argLen: 2, argLen: 2,
@ -68075,12 +68008,7 @@ var opcodeTable = [...]opInfo{
generic: true, generic: true,
}, },
{ {
name: "PermuteFloat64x8", name: "Permute2Int64x8",
argLen: 2,
generic: true,
},
{
name: "Permute2Uint64x8",
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
@ -68090,7 +68018,7 @@ var opcodeTable = [...]opInfo{
generic: true, generic: true,
}, },
{ {
name: "Permute2Int64x8", name: "Permute2Uint64x8",
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
@ -68100,12 +68028,12 @@ var opcodeTable = [...]opInfo{
generic: true, generic: true,
}, },
{ {
name: "Permute2MaskedFloat64x8", name: "Permute2MaskedInt64x8",
argLen: 4, argLen: 4,
generic: true, generic: true,
}, },
{ {
name: "Permute2MaskedInt64x8", name: "Permute2MaskedFloat64x8",
argLen: 4, argLen: 4,
generic: true, generic: true,
}, },
@ -68115,12 +68043,12 @@ var opcodeTable = [...]opInfo{
generic: true, generic: true,
}, },
{ {
name: "PermuteMaskedInt64x8", name: "PermuteMaskedFloat64x8",
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{ {
name: "PermuteMaskedFloat64x8", name: "PermuteMaskedInt64x8",
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
@ -68391,12 +68319,12 @@ var opcodeTable = [...]opInfo{
generic: true, generic: true,
}, },
{ {
name: "Permute2Int8x16", name: "Permute2Uint8x16",
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{ {
name: "Permute2Uint8x16", name: "Permute2Int8x16",
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
@ -68642,19 +68570,14 @@ var opcodeTable = [...]opInfo{
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{
name: "Permute2MaskedInt8x32",
argLen: 4,
generic: true,
},
{ {
name: "Permute2MaskedUint8x32", name: "Permute2MaskedUint8x32",
argLen: 4, argLen: 4,
generic: true, generic: true,
}, },
{ {
name: "PermuteMaskedInt8x32", name: "Permute2MaskedInt8x32",
argLen: 3, argLen: 4,
generic: true, generic: true,
}, },
{ {
@ -68662,6 +68585,11 @@ var opcodeTable = [...]opInfo{
argLen: 3, argLen: 3,
generic: true, generic: true,
}, },
{
name: "PermuteMaskedInt8x32",
argLen: 3,
generic: true,
},
{ {
name: "PopCountUint8x32", name: "PopCountUint8x32",
argLen: 1, argLen: 1,

View file

@ -4297,21 +4297,6 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4(v) return rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4(v)
case OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8: case OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8:
return rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8(v) return rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8(v)
case OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16:
return rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16(v)
case OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4:
return rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4(v)
case OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8:
return rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8(v)
case OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16:
v.Op = OpAMD64VPDPBUSDS512
return true
case OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4:
v.Op = OpAMD64VPDPBUSDS128
return true
case OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8:
v.Op = OpAMD64VPDPBUSDS256
return true
case OpSelect0: case OpSelect0:
return rewriteValueAMD64_OpSelect0(v) return rewriteValueAMD64_OpSelect0(v)
case OpSelect1: case OpSelect1:
@ -5416,21 +5401,6 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x4(v) return rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x4(v)
case OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x8: case OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x8:
return rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x8(v) return rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x8(v)
case OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x16:
return rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x16(v)
case OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x4:
return rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x4(v)
case OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x8:
return rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x8(v)
case OpUnsignedSignedQuadDotProdAccumulateUint32x16:
v.Op = OpAMD64VPDPBUSD512
return true
case OpUnsignedSignedQuadDotProdAccumulateUint32x4:
v.Op = OpAMD64VPDPBUSD128
return true
case OpUnsignedSignedQuadDotProdAccumulateUint32x8:
v.Op = OpAMD64VPDPBUSD256
return true
case OpWB: case OpWB:
v.Op = OpAMD64LoweredWB v.Op = OpAMD64LoweredWB
return true return true
@ -49615,66 +49585,6 @@ func rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32
return true return true
} }
} }
func rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16(v *Value) bool {
v_3 := v.Args[3]
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16 x y z mask)
// result: (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
z := v_2
mask := v_3
v.reset(OpAMD64VPDPBUSDSMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg4(x, y, z, v0)
return true
}
}
func rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4(v *Value) bool {
v_3 := v.Args[3]
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4 x y z mask)
// result: (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
z := v_2
mask := v_3
v.reset(OpAMD64VPDPBUSDSMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg4(x, y, z, v0)
return true
}
}
func rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8(v *Value) bool {
v_3 := v.Args[3]
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8 x y z mask)
// result: (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
z := v_2
mask := v_3
v.reset(OpAMD64VPDPBUSDSMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg4(x, y, z, v0)
return true
}
}
func rewriteValueAMD64_OpSelect0(v *Value) bool { func rewriteValueAMD64_OpSelect0(v *Value) bool {
v_0 := v.Args[0] v_0 := v.Args[0]
b := v.Block b := v.Block
@ -53973,66 +53883,6 @@ func rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x8(v *Val
return true return true
} }
} }
func rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x16(v *Value) bool {
v_3 := v.Args[3]
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (UnsignedSignedQuadDotProdAccumulateMaskedUint32x16 x y z mask)
// result: (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
z := v_2
mask := v_3
v.reset(OpAMD64VPDPBUSDMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg4(x, y, z, v0)
return true
}
}
func rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x4(v *Value) bool {
v_3 := v.Args[3]
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (UnsignedSignedQuadDotProdAccumulateMaskedUint32x4 x y z mask)
// result: (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
z := v_2
mask := v_3
v.reset(OpAMD64VPDPBUSDMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg4(x, y, z, v0)
return true
}
}
func rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x8(v *Value) bool {
v_3 := v.Args[3]
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (UnsignedSignedQuadDotProdAccumulateMaskedUint32x8 x y z mask)
// result: (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
z := v_2
mask := v_3
v.reset(OpAMD64VPDPBUSDMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg4(x, y, z, v0)
return true
}
}
func rewriteValueAMD64_OpXorMaskedInt32x16(v *Value) bool { func rewriteValueAMD64_OpXorMaskedInt32x16(v *Value) bool {
v_2 := v.Args[2] v_2 := v.Args[2]
v_1 := v.Args[1] v_1 := v.Args[1]

View file

@ -1634,6 +1634,12 @@ func opLen3(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa
} }
} }
func opLen3_31(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue3(op, t, args[2], args[1], args[0])
}
}
func opLen3_21(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { func opLen3_21(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue3(op, t, args[1], args[0], args[2]) return s.newValue3(op, t, args[1], args[0], args[2])
@ -1658,6 +1664,12 @@ func opLen4_231(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []
} }
} }
func opLen4_31(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue4(op, t, args[2], args[1], args[0], args[3])
}
}
func plainPanicSimdImm(s *state) { func plainPanicSimdImm(s *state) {
cmp := s.newValue0(ssa.OpConstBool, types.Types[types.TBOOL]) cmp := s.newValue0(ssa.OpConstBool, types.Types[types.TBOOL])
cmp.AuxInt = 0 cmp.AuxInt = 0

View file

@ -993,12 +993,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Int16x8.PairDotProd", opLen2(ssa.OpPairDotProdInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.PairDotProd", opLen2(ssa.OpPairDotProdInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.PairDotProd", opLen2(ssa.OpPairDotProdInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x16.PairDotProd", opLen2(ssa.OpPairDotProdInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x32.PairDotProd", opLen2(ssa.OpPairDotProdInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.PairDotProd", opLen2(ssa.OpPairDotProdInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.PairDotProdAccumulate", opLen3(ssa.OpPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.PairDotProdAccumulate", opLen3_31(ssa.OpPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.PairDotProdAccumulate", opLen3(ssa.OpPairDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x16.PairDotProdAccumulate", opLen3_31(ssa.OpPairDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.PairDotProdAccumulate", opLen3(ssa.OpPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.PairDotProdAccumulate", opLen3_31(ssa.OpPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.PairDotProdAccumulateMasked", opLen4(ssa.OpPairDotProdAccumulateMaskedInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.PairDotProdAccumulateMasked", opLen4_31(ssa.OpPairDotProdAccumulateMaskedInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.PairDotProdAccumulateMasked", opLen4(ssa.OpPairDotProdAccumulateMaskedInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x16.PairDotProdAccumulateMasked", opLen4_31(ssa.OpPairDotProdAccumulateMaskedInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.PairDotProdAccumulateMasked", opLen4(ssa.OpPairDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.PairDotProdAccumulateMasked", opLen4_31(ssa.OpPairDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x16.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x32.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x32, types.TypeVec512), sys.AMD64)
@ -1318,12 +1318,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint16x8.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint16x8.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint16x16.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint16x32.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.SaturatedPairDotProdAccumulate", opLen3(ssa.OpSaturatedPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.SaturatedPairDotProdAccumulate", opLen3_31(ssa.OpSaturatedPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.SaturatedPairDotProdAccumulate", opLen3(ssa.OpSaturatedPairDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x16.SaturatedPairDotProdAccumulate", opLen3_31(ssa.OpSaturatedPairDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.SaturatedPairDotProdAccumulate", opLen3(ssa.OpSaturatedPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.SaturatedPairDotProdAccumulate", opLen3_31(ssa.OpSaturatedPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.SaturatedPairDotProdAccumulateMasked", opLen4(ssa.OpSaturatedPairDotProdAccumulateMaskedInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.SaturatedPairDotProdAccumulateMasked", opLen4_31(ssa.OpSaturatedPairDotProdAccumulateMaskedInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.SaturatedPairDotProdAccumulateMasked", opLen4(ssa.OpSaturatedPairDotProdAccumulateMaskedInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x16.SaturatedPairDotProdAccumulateMasked", opLen4_31(ssa.OpSaturatedPairDotProdAccumulateMaskedInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.SaturatedPairDotProdAccumulateMasked", opLen4(ssa.OpSaturatedPairDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.SaturatedPairDotProdAccumulateMasked", opLen4_31(ssa.OpSaturatedPairDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.SaturatedPairwiseAdd", opLen2(ssa.OpSaturatedPairwiseAddInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.SaturatedPairwiseAdd", opLen2(ssa.OpSaturatedPairwiseAddInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.SaturatedPairwiseAdd", opLen2(ssa.OpSaturatedPairwiseAddInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x16.SaturatedPairwiseAdd", opLen2(ssa.OpSaturatedPairwiseAddInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x8.SaturatedPairwiseSub", opLen2(ssa.OpSaturatedPairwiseSubInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.SaturatedPairwiseSub", opLen2(ssa.OpSaturatedPairwiseSubInt16x8, types.TypeVec128), sys.AMD64)
@ -1358,18 +1358,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint8x16.SaturatedUnsignedSignedPairDotProdMasked", opLen3(ssa.OpSaturatedUnsignedSignedPairDotProdMaskedUint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint8x16.SaturatedUnsignedSignedPairDotProdMasked", opLen3(ssa.OpSaturatedUnsignedSignedPairDotProdMaskedUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.SaturatedUnsignedSignedPairDotProdMasked", opLen3(ssa.OpSaturatedUnsignedSignedPairDotProdMaskedUint8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x32.SaturatedUnsignedSignedPairDotProdMasked", opLen3(ssa.OpSaturatedUnsignedSignedPairDotProdMaskedUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x64.SaturatedUnsignedSignedPairDotProdMasked", opLen3(ssa.OpSaturatedUnsignedSignedPairDotProdMaskedUint8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint8x64.SaturatedUnsignedSignedPairDotProdMasked", opLen3(ssa.OpSaturatedUnsignedSignedPairDotProdMaskedUint8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x16.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x32.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x64.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x4.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x16.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x8.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x32.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x16.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x64.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x4.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x8.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x16.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x8.Set128", opLen2Imm8(ssa.OpSet128Float32x8, types.TypeVec256, 0), sys.AMD64) addF(simdPackage, "Float32x8.Set128", opLen2Imm8(ssa.OpSet128Float32x8, types.TypeVec256, 0), sys.AMD64)
addF(simdPackage, "Float64x4.Set128", opLen2Imm8(ssa.OpSet128Float64x4, types.TypeVec256, 0), sys.AMD64) addF(simdPackage, "Float64x4.Set128", opLen2Imm8(ssa.OpSet128Float64x4, types.TypeVec256, 0), sys.AMD64)
addF(simdPackage, "Int8x32.Set128", opLen2Imm8(ssa.OpSet128Int8x32, types.TypeVec256, 0), sys.AMD64) addF(simdPackage, "Int8x32.Set128", opLen2Imm8(ssa.OpSet128Int8x32, types.TypeVec256, 0), sys.AMD64)
@ -1770,18 +1764,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Float64x2.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x2.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
addF(simdPackage, "Float64x4.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x4.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
addF(simdPackage, "Float64x8.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x8.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
addF(simdPackage, "Int32x4.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x16.UnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x32.UnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x64.UnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x4.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x16.UnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x8.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x32.UnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x16.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x64.UnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.UnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.UnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.UnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x4.UnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x8.UnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x16.UnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.Xor", opLen2(ssa.OpXorInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x16.Xor", opLen2(ssa.OpXorInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.Xor", opLen2(ssa.OpXorInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x32.Xor", opLen2(ssa.OpXorInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x8.Xor", opLen2(ssa.OpXorInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.Xor", opLen2(ssa.OpXorInt16x8, types.TypeVec128), sys.AMD64)

View file

@ -2115,192 +2115,192 @@ func (x Float64x8) FloorWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
/* FusedMultiplyAdd */ /* FusedMultiplyAdd */
// FusedMultiplyAdd performs `(v1 * v2) + v3`. // FusedMultiplyAdd performs (x * y) + z.
// //
// Asm: VFMADD213PS, CPU Feature: AVX512F // Asm: VFMADD213PS, CPU Feature: AVX512F
func (x Float32x4) FusedMultiplyAdd(y Float32x4, z Float32x4) Float32x4 func (x Float32x4) FusedMultiplyAdd(y Float32x4, z Float32x4) Float32x4
// FusedMultiplyAdd performs `(v1 * v2) + v3`. // FusedMultiplyAdd performs (x * y) + z.
// //
// Asm: VFMADD213PS, CPU Feature: AVX512F // Asm: VFMADD213PS, CPU Feature: AVX512F
func (x Float32x8) FusedMultiplyAdd(y Float32x8, z Float32x8) Float32x8 func (x Float32x8) FusedMultiplyAdd(y Float32x8, z Float32x8) Float32x8
// FusedMultiplyAdd performs `(v1 * v2) + v3`. // FusedMultiplyAdd performs (x * y) + z.
// //
// Asm: VFMADD213PS, CPU Feature: AVX512F // Asm: VFMADD213PS, CPU Feature: AVX512F
func (x Float32x16) FusedMultiplyAdd(y Float32x16, z Float32x16) Float32x16 func (x Float32x16) FusedMultiplyAdd(y Float32x16, z Float32x16) Float32x16
// FusedMultiplyAdd performs `(v1 * v2) + v3`. // FusedMultiplyAdd performs (x * y) + z.
// //
// Asm: VFMADD213PD, CPU Feature: AVX512F // Asm: VFMADD213PD, CPU Feature: AVX512F
func (x Float64x2) FusedMultiplyAdd(y Float64x2, z Float64x2) Float64x2 func (x Float64x2) FusedMultiplyAdd(y Float64x2, z Float64x2) Float64x2
// FusedMultiplyAdd performs `(v1 * v2) + v3`. // FusedMultiplyAdd performs (x * y) + z.
// //
// Asm: VFMADD213PD, CPU Feature: AVX512F // Asm: VFMADD213PD, CPU Feature: AVX512F
func (x Float64x4) FusedMultiplyAdd(y Float64x4, z Float64x4) Float64x4 func (x Float64x4) FusedMultiplyAdd(y Float64x4, z Float64x4) Float64x4
// FusedMultiplyAdd performs `(v1 * v2) + v3`. // FusedMultiplyAdd performs (x * y) + z.
// //
// Asm: VFMADD213PD, CPU Feature: AVX512F // Asm: VFMADD213PD, CPU Feature: AVX512F
func (x Float64x8) FusedMultiplyAdd(y Float64x8, z Float64x8) Float64x8 func (x Float64x8) FusedMultiplyAdd(y Float64x8, z Float64x8) Float64x8
/* FusedMultiplyAddMasked */ /* FusedMultiplyAddMasked */
// FusedMultiplyAddMasked performs `(v1 * v2) + v3`. // FusedMultiplyAddMasked performs (x * y) + z.
// //
// Asm: VFMADD213PS, CPU Feature: AVX512F // Asm: VFMADD213PS, CPU Feature: AVX512F
func (x Float32x4) FusedMultiplyAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4 func (x Float32x4) FusedMultiplyAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
// FusedMultiplyAddMasked performs `(v1 * v2) + v3`. // FusedMultiplyAddMasked performs (x * y) + z.
// //
// Asm: VFMADD213PS, CPU Feature: AVX512F // Asm: VFMADD213PS, CPU Feature: AVX512F
func (x Float32x8) FusedMultiplyAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8 func (x Float32x8) FusedMultiplyAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
// FusedMultiplyAddMasked performs `(v1 * v2) + v3`. // FusedMultiplyAddMasked performs (x * y) + z.
// //
// Asm: VFMADD213PS, CPU Feature: AVX512F // Asm: VFMADD213PS, CPU Feature: AVX512F
func (x Float32x16) FusedMultiplyAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16 func (x Float32x16) FusedMultiplyAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
// FusedMultiplyAddMasked performs `(v1 * v2) + v3`. // FusedMultiplyAddMasked performs (x * y) + z.
// //
// Asm: VFMADD213PD, CPU Feature: AVX512F // Asm: VFMADD213PD, CPU Feature: AVX512F
func (x Float64x2) FusedMultiplyAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2 func (x Float64x2) FusedMultiplyAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
// FusedMultiplyAddMasked performs `(v1 * v2) + v3`. // FusedMultiplyAddMasked performs (x * y) + z.
// //
// Asm: VFMADD213PD, CPU Feature: AVX512F // Asm: VFMADD213PD, CPU Feature: AVX512F
func (x Float64x4) FusedMultiplyAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4 func (x Float64x4) FusedMultiplyAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
// FusedMultiplyAddMasked performs `(v1 * v2) + v3`. // FusedMultiplyAddMasked performs (x * y) + z.
// //
// Asm: VFMADD213PD, CPU Feature: AVX512F // Asm: VFMADD213PD, CPU Feature: AVX512F
func (x Float64x8) FusedMultiplyAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8 func (x Float64x8) FusedMultiplyAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
/* FusedMultiplyAddSub */ /* FusedMultiplyAddSub */
// FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. // FusedMultiplyAddSub performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
// //
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F // Asm: VFMADDSUB213PS, CPU Feature: AVX512F
func (x Float32x4) FusedMultiplyAddSub(y Float32x4, z Float32x4) Float32x4 func (x Float32x4) FusedMultiplyAddSub(y Float32x4, z Float32x4) Float32x4
// FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. // FusedMultiplyAddSub performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
// //
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F // Asm: VFMADDSUB213PS, CPU Feature: AVX512F
func (x Float32x8) FusedMultiplyAddSub(y Float32x8, z Float32x8) Float32x8 func (x Float32x8) FusedMultiplyAddSub(y Float32x8, z Float32x8) Float32x8
// FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. // FusedMultiplyAddSub performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
// //
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F // Asm: VFMADDSUB213PS, CPU Feature: AVX512F
func (x Float32x16) FusedMultiplyAddSub(y Float32x16, z Float32x16) Float32x16 func (x Float32x16) FusedMultiplyAddSub(y Float32x16, z Float32x16) Float32x16
// FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. // FusedMultiplyAddSub performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
// //
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F // Asm: VFMADDSUB213PD, CPU Feature: AVX512F
func (x Float64x2) FusedMultiplyAddSub(y Float64x2, z Float64x2) Float64x2 func (x Float64x2) FusedMultiplyAddSub(y Float64x2, z Float64x2) Float64x2
// FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. // FusedMultiplyAddSub performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
// //
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F // Asm: VFMADDSUB213PD, CPU Feature: AVX512F
func (x Float64x4) FusedMultiplyAddSub(y Float64x4, z Float64x4) Float64x4 func (x Float64x4) FusedMultiplyAddSub(y Float64x4, z Float64x4) Float64x4
// FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. // FusedMultiplyAddSub performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
// //
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F // Asm: VFMADDSUB213PD, CPU Feature: AVX512F
func (x Float64x8) FusedMultiplyAddSub(y Float64x8, z Float64x8) Float64x8 func (x Float64x8) FusedMultiplyAddSub(y Float64x8, z Float64x8) Float64x8
/* FusedMultiplyAddSubMasked */ /* FusedMultiplyAddSubMasked */
// FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
// //
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F // Asm: VFMADDSUB213PS, CPU Feature: AVX512F
func (x Float32x4) FusedMultiplyAddSubMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4 func (x Float32x4) FusedMultiplyAddSubMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
// FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
// //
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F // Asm: VFMADDSUB213PS, CPU Feature: AVX512F
func (x Float32x8) FusedMultiplyAddSubMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8 func (x Float32x8) FusedMultiplyAddSubMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
// FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
// //
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F // Asm: VFMADDSUB213PS, CPU Feature: AVX512F
func (x Float32x16) FusedMultiplyAddSubMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16 func (x Float32x16) FusedMultiplyAddSubMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
// FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
// //
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F // Asm: VFMADDSUB213PD, CPU Feature: AVX512F
func (x Float64x2) FusedMultiplyAddSubMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2 func (x Float64x2) FusedMultiplyAddSubMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
// FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
// //
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F // Asm: VFMADDSUB213PD, CPU Feature: AVX512F
func (x Float64x4) FusedMultiplyAddSubMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4 func (x Float64x4) FusedMultiplyAddSubMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
// FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
// //
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F // Asm: VFMADDSUB213PD, CPU Feature: AVX512F
func (x Float64x8) FusedMultiplyAddSubMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8 func (x Float64x8) FusedMultiplyAddSubMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
/* FusedMultiplySubAdd */ /* FusedMultiplySubAdd */
// FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. // FusedMultiplySubAdd performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
// //
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F // Asm: VFMSUBADD213PS, CPU Feature: AVX512F
func (x Float32x4) FusedMultiplySubAdd(y Float32x4, z Float32x4) Float32x4 func (x Float32x4) FusedMultiplySubAdd(y Float32x4, z Float32x4) Float32x4
// FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. // FusedMultiplySubAdd performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
// //
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F // Asm: VFMSUBADD213PS, CPU Feature: AVX512F
func (x Float32x8) FusedMultiplySubAdd(y Float32x8, z Float32x8) Float32x8 func (x Float32x8) FusedMultiplySubAdd(y Float32x8, z Float32x8) Float32x8
// FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. // FusedMultiplySubAdd performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
// //
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F // Asm: VFMSUBADD213PS, CPU Feature: AVX512F
func (x Float32x16) FusedMultiplySubAdd(y Float32x16, z Float32x16) Float32x16 func (x Float32x16) FusedMultiplySubAdd(y Float32x16, z Float32x16) Float32x16
// FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. // FusedMultiplySubAdd performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
// //
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F // Asm: VFMSUBADD213PD, CPU Feature: AVX512F
func (x Float64x2) FusedMultiplySubAdd(y Float64x2, z Float64x2) Float64x2 func (x Float64x2) FusedMultiplySubAdd(y Float64x2, z Float64x2) Float64x2
// FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. // FusedMultiplySubAdd performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
// //
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F // Asm: VFMSUBADD213PD, CPU Feature: AVX512F
func (x Float64x4) FusedMultiplySubAdd(y Float64x4, z Float64x4) Float64x4 func (x Float64x4) FusedMultiplySubAdd(y Float64x4, z Float64x4) Float64x4
// FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. // FusedMultiplySubAdd performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
// //
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F // Asm: VFMSUBADD213PD, CPU Feature: AVX512F
func (x Float64x8) FusedMultiplySubAdd(y Float64x8, z Float64x8) Float64x8 func (x Float64x8) FusedMultiplySubAdd(y Float64x8, z Float64x8) Float64x8
/* FusedMultiplySubAddMasked */ /* FusedMultiplySubAddMasked */
// FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
// //
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F // Asm: VFMSUBADD213PS, CPU Feature: AVX512F
func (x Float32x4) FusedMultiplySubAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4 func (x Float32x4) FusedMultiplySubAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
// FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
// //
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F // Asm: VFMSUBADD213PS, CPU Feature: AVX512F
func (x Float32x8) FusedMultiplySubAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8 func (x Float32x8) FusedMultiplySubAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
// FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
// //
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F // Asm: VFMSUBADD213PS, CPU Feature: AVX512F
func (x Float32x16) FusedMultiplySubAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16 func (x Float32x16) FusedMultiplySubAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
// FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
// //
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F // Asm: VFMSUBADD213PD, CPU Feature: AVX512F
func (x Float64x2) FusedMultiplySubAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2 func (x Float64x2) FusedMultiplySubAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
// FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
// //
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F // Asm: VFMSUBADD213PD, CPU Feature: AVX512F
func (x Float64x4) FusedMultiplySubAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4 func (x Float64x4) FusedMultiplySubAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
// FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
// //
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F // Asm: VFMSUBADD213PD, CPU Feature: AVX512F
func (x Float64x8) FusedMultiplySubAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8 func (x Float64x8) FusedMultiplySubAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
@ -5373,37 +5373,37 @@ func (x Int16x32) PairDotProd(y Int16x32) Int32x16
/* PairDotProdAccumulate */ /* PairDotProdAccumulate */
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. // PairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
// //
// Asm: VPDPWSSD, CPU Feature: AVXVNNI // Asm: VPDPWSSD, CPU Feature: AVXVNNI
func (x Int32x4) PairDotProdAccumulate(y Int16x8, z Int16x8) Int32x4 func (x Int16x8) PairDotProdAccumulate(y Int16x8, z Int32x4) Int32x4
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. // PairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
// //
// Asm: VPDPWSSD, CPU Feature: AVXVNNI // Asm: VPDPWSSD, CPU Feature: AVXVNNI
func (x Int32x8) PairDotProdAccumulate(y Int16x16, z Int16x16) Int32x8 func (x Int16x16) PairDotProdAccumulate(y Int16x16, z Int32x8) Int32x8
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. // PairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
// //
// Asm: VPDPWSSD, CPU Feature: AVX512VNNI // Asm: VPDPWSSD, CPU Feature: AVX512VNNI
func (x Int32x16) PairDotProdAccumulate(y Int16x32, z Int16x32) Int32x16 func (x Int16x32) PairDotProdAccumulate(y Int16x32, z Int32x16) Int32x16
/* PairDotProdAccumulateMasked */ /* PairDotProdAccumulateMasked */
// PairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x. // PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
// //
// Asm: VPDPWSSD, CPU Feature: AVX512VNNI // Asm: VPDPWSSD, CPU Feature: AVX512VNNI
func (x Int32x4) PairDotProdAccumulateMasked(y Int16x8, z Int16x8, mask Mask32x4) Int32x4 func (x Int16x8) PairDotProdAccumulateMasked(y Int16x8, z Int32x4, mask Mask32x4) Int32x4
// PairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x. // PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
// //
// Asm: VPDPWSSD, CPU Feature: AVX512VNNI // Asm: VPDPWSSD, CPU Feature: AVX512VNNI
func (x Int32x8) PairDotProdAccumulateMasked(y Int16x16, z Int16x16, mask Mask32x8) Int32x8 func (x Int16x16) PairDotProdAccumulateMasked(y Int16x16, z Int32x8, mask Mask32x8) Int32x8
// PairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x. // PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
// //
// Asm: VPDPWSSD, CPU Feature: AVX512VNNI // Asm: VPDPWSSD, CPU Feature: AVX512VNNI
func (x Int32x16) PairDotProdAccumulateMasked(y Int16x32, z Int16x32, mask Mask32x16) Int32x16 func (x Int16x32) PairDotProdAccumulateMasked(y Int16x32, z Int32x16, mask Mask32x16) Int32x16
/* PairDotProdMasked */ /* PairDotProdMasked */
@ -7469,37 +7469,37 @@ func (x Uint16x32) SaturatedAddMasked(y Uint16x32, mask Mask16x32) Uint16x32
/* SaturatedPairDotProdAccumulate */ /* SaturatedPairDotProdAccumulate */
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
// //
// Asm: VPDPWSSDS, CPU Feature: AVXVNNI // Asm: VPDPWSSDS, CPU Feature: AVXVNNI
func (x Int32x4) SaturatedPairDotProdAccumulate(y Int16x8, z Int16x8) Int32x4 func (x Int16x8) SaturatedPairDotProdAccumulate(y Int16x8, z Int32x4) Int32x4
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
// //
// Asm: VPDPWSSDS, CPU Feature: AVXVNNI // Asm: VPDPWSSDS, CPU Feature: AVXVNNI
func (x Int32x8) SaturatedPairDotProdAccumulate(y Int16x16, z Int16x16) Int32x8 func (x Int16x16) SaturatedPairDotProdAccumulate(y Int16x16, z Int32x8) Int32x8
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
// //
// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI // Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
func (x Int32x16) SaturatedPairDotProdAccumulate(y Int16x32, z Int16x32) Int32x16 func (x Int16x32) SaturatedPairDotProdAccumulate(y Int16x32, z Int32x16) Int32x16
/* SaturatedPairDotProdAccumulateMasked */ /* SaturatedPairDotProdAccumulateMasked */
// SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x. // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
// //
// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI // Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
func (x Int32x4) SaturatedPairDotProdAccumulateMasked(y Int16x8, z Int16x8, mask Mask32x4) Int32x4 func (x Int16x8) SaturatedPairDotProdAccumulateMasked(y Int16x8, z Int32x4, mask Mask32x4) Int32x4
// SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x. // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
// //
// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI // Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
func (x Int32x8) SaturatedPairDotProdAccumulateMasked(y Int16x16, z Int16x16, mask Mask32x8) Int32x8 func (x Int16x16) SaturatedPairDotProdAccumulateMasked(y Int16x16, z Int32x8, mask Mask32x8) Int32x8
// SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x. // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
// //
// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI // Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
func (x Int32x16) SaturatedPairDotProdAccumulateMasked(y Int16x32, z Int16x32, mask Mask32x16) Int32x16 func (x Int16x32) SaturatedPairDotProdAccumulateMasked(y Int16x32, z Int32x16, mask Mask32x16) Int32x16
/* SaturatedPairwiseAdd */ /* SaturatedPairwiseAdd */
@ -7695,67 +7695,37 @@ func (x Uint8x64) SaturatedUnsignedSignedPairDotProdMasked(y Int8x64, mask Mask1
/* SaturatedUnsignedSignedQuadDotProdAccumulate */ /* SaturatedUnsignedSignedQuadDotProdAccumulate */
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z.
// //
// Asm: VPDPBUSDS, CPU Feature: AVXVNNI // Asm: VPDPBUSDS, CPU Feature: AVXVNNI
func (x Int32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16) Int32x4 func (x Int8x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4) Int32x4
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z.
// //
// Asm: VPDPBUSDS, CPU Feature: AVXVNNI // Asm: VPDPBUSDS, CPU Feature: AVXVNNI
func (x Int32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32) Int32x8 func (x Int8x32) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8) Int32x8
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z.
// //
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
func (x Int32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Int32x16 func (x Int8x64) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
func (x Uint32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16) Uint32x4
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32) Uint32x8
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Uint32x16
/* SaturatedUnsignedSignedQuadDotProdAccumulateMasked */ /* SaturatedUnsignedSignedQuadDotProdAccumulateMasked */
// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
// //
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
func (x Int32x4) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, mask Mask32x4) Int32x4 func (x Int8x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int32x4, mask Mask32x4) Int32x4
// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
// //
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
func (x Int32x8) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, mask Mask32x8) Int32x8 func (x Int8x32) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int32x8, mask Mask32x8) Int32x8
// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
// //
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
func (x Int32x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, mask Mask32x16) Int32x16 func (x Int8x64) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int32x16, mask Mask32x16) Int32x16
// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
func (x Uint32x4) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, mask Mask32x4) Uint32x4
// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, mask Mask32x8) Uint32x8
// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, mask Mask32x16) Uint32x16
/* Set128 */ /* Set128 */
@ -10165,67 +10135,37 @@ func (x Float64x8) TruncWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
/* UnsignedSignedQuadDotProdAccumulate */ /* UnsignedSignedQuadDotProdAccumulate */
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z.
// //
// Asm: VPDPBUSD, CPU Feature: AVXVNNI // Asm: VPDPBUSD, CPU Feature: AVXVNNI
func (x Int32x4) UnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16) Int32x4 func (x Int8x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4) Int32x4
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z.
// //
// Asm: VPDPBUSD, CPU Feature: AVXVNNI // Asm: VPDPBUSD, CPU Feature: AVXVNNI
func (x Int32x8) UnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32) Int32x8 func (x Int8x32) UnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8) Int32x8
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z.
// //
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI // Asm: VPDPBUSD, CPU Feature: AVX512VNNI
func (x Int32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Int32x16 func (x Int8x64) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
// Asm: VPDPBUSD, CPU Feature: AVXVNNI
func (x Uint32x4) UnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16) Uint32x4
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
// Asm: VPDPBUSD, CPU Feature: AVXVNNI
func (x Uint32x8) UnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32) Uint32x8
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
func (x Uint32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Uint32x16
/* UnsignedSignedQuadDotProdAccumulateMasked */ /* UnsignedSignedQuadDotProdAccumulateMasked */
// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x. // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z.
// //
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI // Asm: VPDPBUSD, CPU Feature: AVX512VNNI
func (x Int32x4) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, mask Mask32x4) Int32x4 func (x Int8x16) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int32x4, mask Mask32x4) Int32x4
// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x. // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z.
// //
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI // Asm: VPDPBUSD, CPU Feature: AVX512VNNI
func (x Int32x8) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, mask Mask32x8) Int32x8 func (x Int8x32) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int32x8, mask Mask32x8) Int32x8
// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x. // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z.
// //
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI // Asm: VPDPBUSD, CPU Feature: AVX512VNNI
func (x Int32x16) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, mask Mask32x16) Int32x16 func (x Int8x64) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int32x16, mask Mask32x16) Int32x16
// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
func (x Uint32x4) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, mask Mask32x4) Uint32x4
// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
func (x Uint32x8) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, mask Mask32x8) Uint32x8
// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
//
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
func (x Uint32x16) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, mask Mask32x16) Uint32x16
/* Xor */ /* Xor */

View file

@ -202,6 +202,25 @@ func TestAndNot(t *testing.T) {
[]int32{0b10, 0b00, 0b10, 0b00}, "AndNot") []int32{0b10, 0b00, 0b10, 0b00}, "AndNot")
} }
func TestPairDotProdAccumulate(t *testing.T) {
if !simd.HasAVX512GFNI() {
// TODO: this function is actually VNNI, let's implement and call the right check.
t.Skip("Test requires HasAVX512GFNI, not available on this hardware")
return
}
x := simd.LoadInt16x8Slice([]int16{2, 2, 2, 2, 2, 2, 2, 2})
z := simd.LoadInt32x4Slice([]int32{3, 3, 3, 3})
want := []int32{11, 11, 11, 11}
got := make([]int32, 4)
z = x.PairDotProdAccumulate(x, z)
z.StoreSlice(got)
for i := range 4 {
if got[i] != want[i] {
t.Errorf("a and b differ at index %d, got=%d, want=%d", i, got[i], want[i])
}
}
}
// checkInt8Slices ensures that b and a are equal, to the end of b. // checkInt8Slices ensures that b and a are equal, to the end of b.
// also serves to use the slices, to prevent accidental optimization. // also serves to use the slices, to prevent accidental optimization.
func checkInt8Slices(t *testing.T, a, b []int8) { func checkInt8Slices(t *testing.T, a, b []int8) {

View file

@ -3294,55 +3294,6 @@ func testInt32x4Compare(t *testing.T, v0 []int32, v1 []int32, want []int32, whic
} }
} }
func testInt32x4Int16x8Int16x8Int32x4(t *testing.T, v0 []int32, v1 []int16, v2 []int16, want []int32, which string) {
t.Helper()
var gotv simd.Int32x4
got := make([]int32, len(want))
vec0 := simd.LoadInt32x4Slice(v0)
vec1 := simd.LoadInt16x8Slice(v1)
vec2 := simd.LoadInt16x8Slice(v2)
switch which {
case "PairDotProdAccumulate":
gotv = vec0.PairDotProdAccumulate(vec1, vec2)
case "SaturatedPairDotProdAccumulate":
gotv = vec0.SaturatedPairDotProdAccumulate(vec1, vec2)
default:
t.Errorf("Unknown method: Int32x4.%s", which)
}
gotv.StoreSlice(got)
for i := range len(want) {
if got[i] != want[i] {
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
}
}
}
func testInt32x4Int16x8Int16x8Mask32x4Int32x4(t *testing.T, v0 []int32, v1 []int16, v2 []int16, v3 []int32, want []int32, which string) {
t.Helper()
var gotv simd.Int32x4
got := make([]int32, len(want))
vec0 := simd.LoadInt32x4Slice(v0)
vec1 := simd.LoadInt16x8Slice(v1)
vec2 := simd.LoadInt16x8Slice(v2)
vec3 := simd.LoadInt32x4Slice(v3)
switch which {
case "PairDotProdAccumulateMasked":
gotv = vec0.PairDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x4())
case "SaturatedPairDotProdAccumulateMasked":
gotv = vec0.SaturatedPairDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x4())
default:
t.Errorf("Unknown method: Int32x4.%s", which)
}
gotv.StoreSlice(got)
for i := range len(want) {
if got[i] != want[i] {
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
}
}
}
func testInt32x4Mask32x4Int32x4(t *testing.T, v0 []int32, v1 []int32, want []int32, which string) { func testInt32x4Mask32x4Int32x4(t *testing.T, v0 []int32, v1 []int32, want []int32, which string) {
t.Helper() t.Helper()
var gotv simd.Int32x4 var gotv simd.Int32x4
@ -3445,55 +3396,6 @@ func testInt32x4TernaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32,
} }
} }
func testInt32x4Uint8x16Int8x16Int32x4(t *testing.T, v0 []int32, v1 []uint8, v2 []int8, want []int32, which string) {
t.Helper()
var gotv simd.Int32x4
got := make([]int32, len(want))
vec0 := simd.LoadInt32x4Slice(v0)
vec1 := simd.LoadUint8x16Slice(v1)
vec2 := simd.LoadInt8x16Slice(v2)
switch which {
case "SaturatedUnsignedSignedQuadDotProdAccumulate":
gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2)
case "UnsignedSignedQuadDotProdAccumulate":
gotv = vec0.UnsignedSignedQuadDotProdAccumulate(vec1, vec2)
default:
t.Errorf("Unknown method: Int32x4.%s", which)
}
gotv.StoreSlice(got)
for i := range len(want) {
if got[i] != want[i] {
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
}
}
}
func testInt32x4Uint8x16Int8x16Mask32x4Int32x4(t *testing.T, v0 []int32, v1 []uint8, v2 []int8, v3 []int32, want []int32, which string) {
t.Helper()
var gotv simd.Int32x4
got := make([]int32, len(want))
vec0 := simd.LoadInt32x4Slice(v0)
vec1 := simd.LoadUint8x16Slice(v1)
vec2 := simd.LoadInt8x16Slice(v2)
vec3 := simd.LoadInt32x4Slice(v3)
switch which {
case "SaturatedUnsignedSignedQuadDotProdAccumulateMasked":
gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x4())
case "UnsignedSignedQuadDotProdAccumulateMasked":
gotv = vec0.UnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x4())
default:
t.Errorf("Unknown method: Int32x4.%s", which)
}
gotv.StoreSlice(got)
for i := range len(want) {
if got[i] != want[i] {
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
}
}
}
func testInt32x4Unary(t *testing.T, v0 []int32, want []int32, which string) { func testInt32x4Unary(t *testing.T, v0 []int32, want []int32, which string) {
t.Helper() t.Helper()
var gotv simd.Int32x4 var gotv simd.Int32x4
@ -3688,55 +3590,6 @@ func testInt32x8Compare(t *testing.T, v0 []int32, v1 []int32, want []int32, whic
} }
} }
func testInt32x8Int16x16Int16x16Int32x8(t *testing.T, v0 []int32, v1 []int16, v2 []int16, want []int32, which string) {
t.Helper()
var gotv simd.Int32x8
got := make([]int32, len(want))
vec0 := simd.LoadInt32x8Slice(v0)
vec1 := simd.LoadInt16x16Slice(v1)
vec2 := simd.LoadInt16x16Slice(v2)
switch which {
case "PairDotProdAccumulate":
gotv = vec0.PairDotProdAccumulate(vec1, vec2)
case "SaturatedPairDotProdAccumulate":
gotv = vec0.SaturatedPairDotProdAccumulate(vec1, vec2)
default:
t.Errorf("Unknown method: Int32x8.%s", which)
}
gotv.StoreSlice(got)
for i := range len(want) {
if got[i] != want[i] {
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
}
}
}
func testInt32x8Int16x16Int16x16Mask32x8Int32x8(t *testing.T, v0 []int32, v1 []int16, v2 []int16, v3 []int32, want []int32, which string) {
t.Helper()
var gotv simd.Int32x8
got := make([]int32, len(want))
vec0 := simd.LoadInt32x8Slice(v0)
vec1 := simd.LoadInt16x16Slice(v1)
vec2 := simd.LoadInt16x16Slice(v2)
vec3 := simd.LoadInt32x8Slice(v3)
switch which {
case "PairDotProdAccumulateMasked":
gotv = vec0.PairDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x8())
case "SaturatedPairDotProdAccumulateMasked":
gotv = vec0.SaturatedPairDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x8())
default:
t.Errorf("Unknown method: Int32x8.%s", which)
}
gotv.StoreSlice(got)
for i := range len(want) {
if got[i] != want[i] {
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
}
}
}
func testInt32x8Mask32x8Int32x8(t *testing.T, v0 []int32, v1 []int32, want []int32, which string) { func testInt32x8Mask32x8Int32x8(t *testing.T, v0 []int32, v1 []int32, want []int32, which string) {
t.Helper() t.Helper()
var gotv simd.Int32x8 var gotv simd.Int32x8
@ -3839,55 +3692,6 @@ func testInt32x8TernaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32,
} }
} }
func testInt32x8Uint8x32Int8x32Int32x8(t *testing.T, v0 []int32, v1 []uint8, v2 []int8, want []int32, which string) {
t.Helper()
var gotv simd.Int32x8
got := make([]int32, len(want))
vec0 := simd.LoadInt32x8Slice(v0)
vec1 := simd.LoadUint8x32Slice(v1)
vec2 := simd.LoadInt8x32Slice(v2)
switch which {
case "SaturatedUnsignedSignedQuadDotProdAccumulate":
gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2)
case "UnsignedSignedQuadDotProdAccumulate":
gotv = vec0.UnsignedSignedQuadDotProdAccumulate(vec1, vec2)
default:
t.Errorf("Unknown method: Int32x8.%s", which)
}
gotv.StoreSlice(got)
for i := range len(want) {
if got[i] != want[i] {
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
}
}
}
func testInt32x8Uint8x32Int8x32Mask32x8Int32x8(t *testing.T, v0 []int32, v1 []uint8, v2 []int8, v3 []int32, want []int32, which string) {
t.Helper()
var gotv simd.Int32x8
got := make([]int32, len(want))
vec0 := simd.LoadInt32x8Slice(v0)
vec1 := simd.LoadUint8x32Slice(v1)
vec2 := simd.LoadInt8x32Slice(v2)
vec3 := simd.LoadInt32x8Slice(v3)
switch which {
case "SaturatedUnsignedSignedQuadDotProdAccumulateMasked":
gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x8())
case "UnsignedSignedQuadDotProdAccumulateMasked":
gotv = vec0.UnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x8())
default:
t.Errorf("Unknown method: Int32x8.%s", which)
}
gotv.StoreSlice(got)
for i := range len(want) {
if got[i] != want[i] {
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
}
}
}
func testInt32x8Unary(t *testing.T, v0 []int32, want []int32, which string) { func testInt32x8Unary(t *testing.T, v0 []int32, want []int32, which string) {
t.Helper() t.Helper()
var gotv simd.Int32x8 var gotv simd.Int32x8
@ -4055,55 +3859,6 @@ func testInt32x16Compare(t *testing.T, v0 []int32, v1 []int32, want []int32, whi
} }
} }
func testInt32x16Int16x32Int16x32Int32x16(t *testing.T, v0 []int32, v1 []int16, v2 []int16, want []int32, which string) {
t.Helper()
var gotv simd.Int32x16
got := make([]int32, len(want))
vec0 := simd.LoadInt32x16Slice(v0)
vec1 := simd.LoadInt16x32Slice(v1)
vec2 := simd.LoadInt16x32Slice(v2)
switch which {
case "PairDotProdAccumulate":
gotv = vec0.PairDotProdAccumulate(vec1, vec2)
case "SaturatedPairDotProdAccumulate":
gotv = vec0.SaturatedPairDotProdAccumulate(vec1, vec2)
default:
t.Errorf("Unknown method: Int32x16.%s", which)
}
gotv.StoreSlice(got)
for i := range len(want) {
if got[i] != want[i] {
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
}
}
}
func testInt32x16Int16x32Int16x32Mask32x16Int32x16(t *testing.T, v0 []int32, v1 []int16, v2 []int16, v3 []int32, want []int32, which string) {
t.Helper()
var gotv simd.Int32x16
got := make([]int32, len(want))
vec0 := simd.LoadInt32x16Slice(v0)
vec1 := simd.LoadInt16x32Slice(v1)
vec2 := simd.LoadInt16x32Slice(v2)
vec3 := simd.LoadInt32x16Slice(v3)
switch which {
case "PairDotProdAccumulateMasked":
gotv = vec0.PairDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x16())
case "SaturatedPairDotProdAccumulateMasked":
gotv = vec0.SaturatedPairDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x16())
default:
t.Errorf("Unknown method: Int32x16.%s", which)
}
gotv.StoreSlice(got)
for i := range len(want) {
if got[i] != want[i] {
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
}
}
}
func testInt32x16Mask32x16Int32x16(t *testing.T, v0 []int32, v1 []int32, want []int32, which string) { func testInt32x16Mask32x16Int32x16(t *testing.T, v0 []int32, v1 []int32, want []int32, which string) {
t.Helper() t.Helper()
var gotv simd.Int32x16 var gotv simd.Int32x16
@ -4206,55 +3961,6 @@ func testInt32x16TernaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32,
} }
} }
func testInt32x16Uint8x64Int8x64Int32x16(t *testing.T, v0 []int32, v1 []uint8, v2 []int8, want []int32, which string) {
t.Helper()
var gotv simd.Int32x16
got := make([]int32, len(want))
vec0 := simd.LoadInt32x16Slice(v0)
vec1 := simd.LoadUint8x64Slice(v1)
vec2 := simd.LoadInt8x64Slice(v2)
switch which {
case "SaturatedUnsignedSignedQuadDotProdAccumulate":
gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2)
case "UnsignedSignedQuadDotProdAccumulate":
gotv = vec0.UnsignedSignedQuadDotProdAccumulate(vec1, vec2)
default:
t.Errorf("Unknown method: Int32x16.%s", which)
}
gotv.StoreSlice(got)
for i := range len(want) {
if got[i] != want[i] {
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
}
}
}
func testInt32x16Uint8x64Int8x64Mask32x16Int32x16(t *testing.T, v0 []int32, v1 []uint8, v2 []int8, v3 []int32, want []int32, which string) {
t.Helper()
var gotv simd.Int32x16
got := make([]int32, len(want))
vec0 := simd.LoadInt32x16Slice(v0)
vec1 := simd.LoadUint8x64Slice(v1)
vec2 := simd.LoadInt8x64Slice(v2)
vec3 := simd.LoadInt32x16Slice(v3)
switch which {
case "SaturatedUnsignedSignedQuadDotProdAccumulateMasked":
gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x16())
case "UnsignedSignedQuadDotProdAccumulateMasked":
gotv = vec0.UnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x16())
default:
t.Errorf("Unknown method: Int32x16.%s", which)
}
gotv.StoreSlice(got)
for i := range len(want) {
if got[i] != want[i] {
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
}
}
}
func testInt32x16Unary(t *testing.T, v0 []int32, want []int32, which string) { func testInt32x16Unary(t *testing.T, v0 []int32, want []int32, which string) {
t.Helper() t.Helper()
var gotv simd.Int32x16 var gotv simd.Int32x16
@ -6880,55 +6586,6 @@ func testUint32x4TernaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []uint
} }
} }
func testUint32x4Uint8x16Int8x16Mask32x4Uint32x4(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, v3 []int32, want []uint32, which string) {
t.Helper()
var gotv simd.Uint32x4
got := make([]uint32, len(want))
vec0 := simd.LoadUint32x4Slice(v0)
vec1 := simd.LoadUint8x16Slice(v1)
vec2 := simd.LoadInt8x16Slice(v2)
vec3 := simd.LoadInt32x4Slice(v3)
switch which {
case "SaturatedUnsignedSignedQuadDotProdAccumulateMasked":
gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x4())
case "UnsignedSignedQuadDotProdAccumulateMasked":
gotv = vec0.UnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x4())
default:
t.Errorf("Unknown method: Uint32x4.%s", which)
}
gotv.StoreSlice(got)
for i := range len(want) {
if got[i] != want[i] {
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
}
}
}
func testUint32x4Uint8x16Int8x16Uint32x4(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, want []uint32, which string) {
t.Helper()
var gotv simd.Uint32x4
got := make([]uint32, len(want))
vec0 := simd.LoadUint32x4Slice(v0)
vec1 := simd.LoadUint8x16Slice(v1)
vec2 := simd.LoadInt8x16Slice(v2)
switch which {
case "SaturatedUnsignedSignedQuadDotProdAccumulate":
gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2)
case "UnsignedSignedQuadDotProdAccumulate":
gotv = vec0.UnsignedSignedQuadDotProdAccumulate(vec1, vec2)
default:
t.Errorf("Unknown method: Uint32x4.%s", which)
}
gotv.StoreSlice(got)
for i := range len(want) {
if got[i] != want[i] {
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
}
}
}
func testUint32x4Unary(t *testing.T, v0 []uint32, want []uint32, which string) { func testUint32x4Unary(t *testing.T, v0 []uint32, want []uint32, which string) {
t.Helper() t.Helper()
var gotv simd.Uint32x4 var gotv simd.Uint32x4
@ -7215,55 +6872,6 @@ func testUint32x8TernaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []uint
} }
} }
func testUint32x8Uint8x32Int8x32Mask32x8Uint32x8(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, v3 []int32, want []uint32, which string) {
t.Helper()
var gotv simd.Uint32x8
got := make([]uint32, len(want))
vec0 := simd.LoadUint32x8Slice(v0)
vec1 := simd.LoadUint8x32Slice(v1)
vec2 := simd.LoadInt8x32Slice(v2)
vec3 := simd.LoadInt32x8Slice(v3)
switch which {
case "SaturatedUnsignedSignedQuadDotProdAccumulateMasked":
gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x8())
case "UnsignedSignedQuadDotProdAccumulateMasked":
gotv = vec0.UnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x8())
default:
t.Errorf("Unknown method: Uint32x8.%s", which)
}
gotv.StoreSlice(got)
for i := range len(want) {
if got[i] != want[i] {
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
}
}
}
func testUint32x8Uint8x32Int8x32Uint32x8(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, want []uint32, which string) {
t.Helper()
var gotv simd.Uint32x8
got := make([]uint32, len(want))
vec0 := simd.LoadUint32x8Slice(v0)
vec1 := simd.LoadUint8x32Slice(v1)
vec2 := simd.LoadInt8x32Slice(v2)
switch which {
case "SaturatedUnsignedSignedQuadDotProdAccumulate":
gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2)
case "UnsignedSignedQuadDotProdAccumulate":
gotv = vec0.UnsignedSignedQuadDotProdAccumulate(vec1, vec2)
default:
t.Errorf("Unknown method: Uint32x8.%s", which)
}
gotv.StoreSlice(got)
for i := range len(want) {
if got[i] != want[i] {
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
}
}
}
func testUint32x8Unary(t *testing.T, v0 []uint32, want []uint32, which string) { func testUint32x8Unary(t *testing.T, v0 []uint32, want []uint32, which string) {
t.Helper() t.Helper()
var gotv simd.Uint32x8 var gotv simd.Uint32x8
@ -7525,55 +7133,6 @@ func testUint32x16TernaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []uin
} }
} }
func testUint32x16Uint8x64Int8x64Mask32x16Uint32x16(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, v3 []int32, want []uint32, which string) {
t.Helper()
var gotv simd.Uint32x16
got := make([]uint32, len(want))
vec0 := simd.LoadUint32x16Slice(v0)
vec1 := simd.LoadUint8x64Slice(v1)
vec2 := simd.LoadInt8x64Slice(v2)
vec3 := simd.LoadInt32x16Slice(v3)
switch which {
case "SaturatedUnsignedSignedQuadDotProdAccumulateMasked":
gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x16())
case "UnsignedSignedQuadDotProdAccumulateMasked":
gotv = vec0.UnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x16())
default:
t.Errorf("Unknown method: Uint32x16.%s", which)
}
gotv.StoreSlice(got)
for i := range len(want) {
if got[i] != want[i] {
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
}
}
}
func testUint32x16Uint8x64Int8x64Uint32x16(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, want []uint32, which string) {
t.Helper()
var gotv simd.Uint32x16
got := make([]uint32, len(want))
vec0 := simd.LoadUint32x16Slice(v0)
vec1 := simd.LoadUint8x64Slice(v1)
vec2 := simd.LoadInt8x64Slice(v2)
switch which {
case "SaturatedUnsignedSignedQuadDotProdAccumulate":
gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2)
case "UnsignedSignedQuadDotProdAccumulate":
gotv = vec0.UnsignedSignedQuadDotProdAccumulate(vec1, vec2)
default:
t.Errorf("Unknown method: Uint32x16.%s", which)
}
gotv.StoreSlice(got)
for i := range len(want) {
if got[i] != want[i] {
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
}
}
}
func testUint32x16Unary(t *testing.T, v0 []uint32, want []uint32, which string) { func testUint32x16Unary(t *testing.T, v0 []uint32, want []uint32, which string) {
t.Helper() t.Helper()
var gotv simd.Uint32x16 var gotv simd.Uint32x16
@ -8430,6 +7989,8 @@ func testUint64x8UnaryMasked(t *testing.T, v0 []uint64, v1 []int64, want []uint6
// GaloisFieldAffineTransformMasked // GaloisFieldAffineTransformMasked
// Get128 // Get128
// GetElem // GetElem
// PairDotProdAccumulate
// PairDotProdAccumulateMasked
// Permute // Permute
// Permute2 // Permute2
// Permute2Masked // Permute2Masked
@ -8440,6 +8001,10 @@ func testUint64x8UnaryMasked(t *testing.T, v0 []uint64, v1 []int64, want []uint6
// RotateAllRightMasked // RotateAllRightMasked
// RoundWithPrecision // RoundWithPrecision
// RoundWithPrecisionMasked // RoundWithPrecisionMasked
// SaturatedPairDotProdAccumulate
// SaturatedPairDotProdAccumulateMasked
// SaturatedUnsignedSignedQuadDotProdAccumulate
// SaturatedUnsignedSignedQuadDotProdAccumulateMasked
// Set128 // Set128
// SetElem // SetElem
// ShiftAllLeft // ShiftAllLeft
@ -8452,3 +8017,5 @@ func testUint64x8UnaryMasked(t *testing.T, v0 []uint64, v1 []int64, want []uint6
// ShiftAllRightMasked // ShiftAllRightMasked
// TruncWithPrecision // TruncWithPrecision
// TruncWithPrecisionMasked // TruncWithPrecisionMasked
// UnsignedSignedQuadDotProdAccumulate
// UnsignedSignedQuadDotProdAccumulateMasked