mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile, simd: reorder PairDotProdAccumulate
This CL reorderes the param order of PairDotProdAccumulate family to be dotprod(x, y) + z instead of the old dotprod(y, z) + x. This CL also updates some documentation of other ML Ops. This CL added a test to test the behavior is correct. This CL is partially generated by CL 688115. Change-Id: I76a6ee55a2ad8e3aff388d7e4fa5218ec0e4800d Reviewed-on: https://go-review.googlesource.com/c/go/+/688095 Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
ef5f6cc921
commit
c61743e4f0
9 changed files with 288 additions and 1008 deletions
|
|
@ -1350,15 +1350,9 @@
|
||||||
(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 ...) => (VPDPBUSDS128 ...)
|
(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 ...) => (VPDPBUSDS128 ...)
|
||||||
(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 ...) => (VPDPBUSDS256 ...)
|
(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 ...) => (VPDPBUSDS256 ...)
|
||||||
(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSDS512 ...)
|
(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSDS512 ...)
|
||||||
(SaturatedUnsignedSignedQuadDotProdAccumulateUint32x4 ...) => (VPDPBUSDS128 ...)
|
|
||||||
(SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8 ...) => (VPDPBUSDS256 ...)
|
|
||||||
(SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16 ...) => (VPDPBUSDS512 ...)
|
|
||||||
(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
|
(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||||
(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
|
(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||||
(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
|
(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||||
(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
|
|
||||||
(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
|
|
||||||
(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
|
|
||||||
(Set128Float32x8 ...) => (VINSERTF128256 ...)
|
(Set128Float32x8 ...) => (VINSERTF128256 ...)
|
||||||
(Set128Float64x4 ...) => (VINSERTF128256 ...)
|
(Set128Float64x4 ...) => (VINSERTF128256 ...)
|
||||||
(Set128Int8x32 ...) => (VINSERTI128256 ...)
|
(Set128Int8x32 ...) => (VINSERTI128256 ...)
|
||||||
|
|
@ -1762,15 +1756,9 @@
|
||||||
(UnsignedSignedQuadDotProdAccumulateInt32x4 ...) => (VPDPBUSD128 ...)
|
(UnsignedSignedQuadDotProdAccumulateInt32x4 ...) => (VPDPBUSD128 ...)
|
||||||
(UnsignedSignedQuadDotProdAccumulateInt32x8 ...) => (VPDPBUSD256 ...)
|
(UnsignedSignedQuadDotProdAccumulateInt32x8 ...) => (VPDPBUSD256 ...)
|
||||||
(UnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSD512 ...)
|
(UnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSD512 ...)
|
||||||
(UnsignedSignedQuadDotProdAccumulateUint32x4 ...) => (VPDPBUSD128 ...)
|
|
||||||
(UnsignedSignedQuadDotProdAccumulateUint32x8 ...) => (VPDPBUSD256 ...)
|
|
||||||
(UnsignedSignedQuadDotProdAccumulateUint32x16 ...) => (VPDPBUSD512 ...)
|
|
||||||
(UnsignedSignedQuadDotProdAccumulateMaskedInt32x4 x y z mask) => (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
|
(UnsignedSignedQuadDotProdAccumulateMaskedInt32x4 x y z mask) => (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||||
(UnsignedSignedQuadDotProdAccumulateMaskedInt32x8 x y z mask) => (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
|
(UnsignedSignedQuadDotProdAccumulateMaskedInt32x8 x y z mask) => (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||||
(UnsignedSignedQuadDotProdAccumulateMaskedInt32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
|
(UnsignedSignedQuadDotProdAccumulateMaskedInt32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||||
(UnsignedSignedQuadDotProdAccumulateMaskedUint32x4 x y z mask) => (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
|
|
||||||
(UnsignedSignedQuadDotProdAccumulateMaskedUint32x8 x y z mask) => (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
|
|
||||||
(UnsignedSignedQuadDotProdAccumulateMaskedUint32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
|
|
||||||
(XorInt8x16 ...) => (VPXOR128 ...)
|
(XorInt8x16 ...) => (VPXOR128 ...)
|
||||||
(XorInt8x32 ...) => (VPXOR256 ...)
|
(XorInt8x32 ...) => (VPXOR256 ...)
|
||||||
(XorInt16x8 ...) => (VPXOR128 ...)
|
(XorInt16x8 ...) => (VPXOR128 ...)
|
||||||
|
|
|
||||||
|
|
@ -914,8 +914,8 @@ func simdGenericOps() []opData {
|
||||||
{name: "Permute2Int16x16", argLength: 3, commutative: false},
|
{name: "Permute2Int16x16", argLength: 3, commutative: false},
|
||||||
{name: "Permute2MaskedInt16x16", argLength: 4, commutative: false},
|
{name: "Permute2MaskedInt16x16", argLength: 4, commutative: false},
|
||||||
{name: "Permute2MaskedUint16x16", argLength: 4, commutative: false},
|
{name: "Permute2MaskedUint16x16", argLength: 4, commutative: false},
|
||||||
{name: "PermuteMaskedInt16x16", argLength: 3, commutative: false},
|
|
||||||
{name: "PermuteMaskedUint16x16", argLength: 3, commutative: false},
|
{name: "PermuteMaskedUint16x16", argLength: 3, commutative: false},
|
||||||
|
{name: "PermuteMaskedInt16x16", argLength: 3, commutative: false},
|
||||||
{name: "PopCountUint16x16", argLength: 1, commutative: false},
|
{name: "PopCountUint16x16", argLength: 1, commutative: false},
|
||||||
{name: "PopCountMaskedUint16x16", argLength: 2, commutative: false},
|
{name: "PopCountMaskedUint16x16", argLength: 2, commutative: false},
|
||||||
{name: "SaturatedAddUint16x16", argLength: 2, commutative: true},
|
{name: "SaturatedAddUint16x16", argLength: 2, commutative: true},
|
||||||
|
|
@ -960,12 +960,12 @@ func simdGenericOps() []opData {
|
||||||
{name: "MulHighMaskedUint16x32", argLength: 3, commutative: true},
|
{name: "MulHighMaskedUint16x32", argLength: 3, commutative: true},
|
||||||
{name: "NotEqualUint16x32", argLength: 2, commutative: true},
|
{name: "NotEqualUint16x32", argLength: 2, commutative: true},
|
||||||
{name: "NotEqualMaskedUint16x32", argLength: 3, commutative: true},
|
{name: "NotEqualMaskedUint16x32", argLength: 3, commutative: true},
|
||||||
{name: "PermuteInt16x32", argLength: 2, commutative: false},
|
|
||||||
{name: "PermuteUint16x32", argLength: 2, commutative: false},
|
{name: "PermuteUint16x32", argLength: 2, commutative: false},
|
||||||
{name: "Permute2Int16x32", argLength: 3, commutative: false},
|
{name: "PermuteInt16x32", argLength: 2, commutative: false},
|
||||||
{name: "Permute2Uint16x32", argLength: 3, commutative: false},
|
{name: "Permute2Uint16x32", argLength: 3, commutative: false},
|
||||||
{name: "Permute2MaskedInt16x32", argLength: 4, commutative: false},
|
{name: "Permute2Int16x32", argLength: 3, commutative: false},
|
||||||
{name: "Permute2MaskedUint16x32", argLength: 4, commutative: false},
|
{name: "Permute2MaskedUint16x32", argLength: 4, commutative: false},
|
||||||
|
{name: "Permute2MaskedInt16x32", argLength: 4, commutative: false},
|
||||||
{name: "PermuteMaskedUint16x32", argLength: 3, commutative: false},
|
{name: "PermuteMaskedUint16x32", argLength: 3, commutative: false},
|
||||||
{name: "PermuteMaskedInt16x32", argLength: 3, commutative: false},
|
{name: "PermuteMaskedInt16x32", argLength: 3, commutative: false},
|
||||||
{name: "PopCountUint16x32", argLength: 1, commutative: false},
|
{name: "PopCountUint16x32", argLength: 1, commutative: false},
|
||||||
|
|
@ -1016,14 +1016,14 @@ func simdGenericOps() []opData {
|
||||||
{name: "OrUint16x8", argLength: 2, commutative: true},
|
{name: "OrUint16x8", argLength: 2, commutative: true},
|
||||||
{name: "PairwiseAddUint16x8", argLength: 2, commutative: false},
|
{name: "PairwiseAddUint16x8", argLength: 2, commutative: false},
|
||||||
{name: "PairwiseSubUint16x8", argLength: 2, commutative: false},
|
{name: "PairwiseSubUint16x8", argLength: 2, commutative: false},
|
||||||
{name: "PermuteUint16x8", argLength: 2, commutative: false},
|
|
||||||
{name: "PermuteInt16x8", argLength: 2, commutative: false},
|
{name: "PermuteInt16x8", argLength: 2, commutative: false},
|
||||||
|
{name: "PermuteUint16x8", argLength: 2, commutative: false},
|
||||||
{name: "Permute2Int16x8", argLength: 3, commutative: false},
|
{name: "Permute2Int16x8", argLength: 3, commutative: false},
|
||||||
{name: "Permute2Uint16x8", argLength: 3, commutative: false},
|
{name: "Permute2Uint16x8", argLength: 3, commutative: false},
|
||||||
{name: "Permute2MaskedUint16x8", argLength: 4, commutative: false},
|
|
||||||
{name: "Permute2MaskedInt16x8", argLength: 4, commutative: false},
|
{name: "Permute2MaskedInt16x8", argLength: 4, commutative: false},
|
||||||
{name: "PermuteMaskedInt16x8", argLength: 3, commutative: false},
|
{name: "Permute2MaskedUint16x8", argLength: 4, commutative: false},
|
||||||
{name: "PermuteMaskedUint16x8", argLength: 3, commutative: false},
|
{name: "PermuteMaskedUint16x8", argLength: 3, commutative: false},
|
||||||
|
{name: "PermuteMaskedInt16x8", argLength: 3, commutative: false},
|
||||||
{name: "PopCountUint16x8", argLength: 1, commutative: false},
|
{name: "PopCountUint16x8", argLength: 1, commutative: false},
|
||||||
{name: "PopCountMaskedUint16x8", argLength: 2, commutative: false},
|
{name: "PopCountMaskedUint16x8", argLength: 2, commutative: false},
|
||||||
{name: "SaturatedAddUint16x8", argLength: 2, commutative: true},
|
{name: "SaturatedAddUint16x8", argLength: 2, commutative: true},
|
||||||
|
|
@ -1070,26 +1070,24 @@ func simdGenericOps() []opData {
|
||||||
{name: "NotEqualMaskedUint32x16", argLength: 3, commutative: true},
|
{name: "NotEqualMaskedUint32x16", argLength: 3, commutative: true},
|
||||||
{name: "OrUint32x16", argLength: 2, commutative: true},
|
{name: "OrUint32x16", argLength: 2, commutative: true},
|
||||||
{name: "OrMaskedUint32x16", argLength: 3, commutative: true},
|
{name: "OrMaskedUint32x16", argLength: 3, commutative: true},
|
||||||
{name: "PermuteInt32x16", argLength: 2, commutative: false},
|
|
||||||
{name: "PermuteFloat32x16", argLength: 2, commutative: false},
|
{name: "PermuteFloat32x16", argLength: 2, commutative: false},
|
||||||
|
{name: "PermuteInt32x16", argLength: 2, commutative: false},
|
||||||
{name: "PermuteUint32x16", argLength: 2, commutative: false},
|
{name: "PermuteUint32x16", argLength: 2, commutative: false},
|
||||||
{name: "Permute2Uint32x16", argLength: 3, commutative: false},
|
{name: "Permute2Uint32x16", argLength: 3, commutative: false},
|
||||||
{name: "Permute2Float32x16", argLength: 3, commutative: false},
|
{name: "Permute2Float32x16", argLength: 3, commutative: false},
|
||||||
{name: "Permute2Int32x16", argLength: 3, commutative: false},
|
{name: "Permute2Int32x16", argLength: 3, commutative: false},
|
||||||
{name: "Permute2MaskedUint32x16", argLength: 4, commutative: false},
|
|
||||||
{name: "Permute2MaskedInt32x16", argLength: 4, commutative: false},
|
{name: "Permute2MaskedInt32x16", argLength: 4, commutative: false},
|
||||||
{name: "Permute2MaskedFloat32x16", argLength: 4, commutative: false},
|
{name: "Permute2MaskedFloat32x16", argLength: 4, commutative: false},
|
||||||
|
{name: "Permute2MaskedUint32x16", argLength: 4, commutative: false},
|
||||||
|
{name: "PermuteMaskedInt32x16", argLength: 3, commutative: false},
|
||||||
{name: "PermuteMaskedFloat32x16", argLength: 3, commutative: false},
|
{name: "PermuteMaskedFloat32x16", argLength: 3, commutative: false},
|
||||||
{name: "PermuteMaskedUint32x16", argLength: 3, commutative: false},
|
{name: "PermuteMaskedUint32x16", argLength: 3, commutative: false},
|
||||||
{name: "PermuteMaskedInt32x16", argLength: 3, commutative: false},
|
|
||||||
{name: "PopCountUint32x16", argLength: 1, commutative: false},
|
{name: "PopCountUint32x16", argLength: 1, commutative: false},
|
||||||
{name: "PopCountMaskedUint32x16", argLength: 2, commutative: false},
|
{name: "PopCountMaskedUint32x16", argLength: 2, commutative: false},
|
||||||
{name: "RotateLeftUint32x16", argLength: 2, commutative: false},
|
{name: "RotateLeftUint32x16", argLength: 2, commutative: false},
|
||||||
{name: "RotateLeftMaskedUint32x16", argLength: 3, commutative: false},
|
{name: "RotateLeftMaskedUint32x16", argLength: 3, commutative: false},
|
||||||
{name: "RotateRightUint32x16", argLength: 2, commutative: false},
|
{name: "RotateRightUint32x16", argLength: 2, commutative: false},
|
||||||
{name: "RotateRightMaskedUint32x16", argLength: 3, commutative: false},
|
{name: "RotateRightMaskedUint32x16", argLength: 3, commutative: false},
|
||||||
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 3, commutative: false},
|
|
||||||
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16", argLength: 4, commutative: false},
|
|
||||||
{name: "ShiftAllLeftUint32x16", argLength: 2, commutative: false},
|
{name: "ShiftAllLeftUint32x16", argLength: 2, commutative: false},
|
||||||
{name: "ShiftAllLeftMaskedUint32x16", argLength: 3, commutative: false},
|
{name: "ShiftAllLeftMaskedUint32x16", argLength: 3, commutative: false},
|
||||||
{name: "ShiftAllRightUint32x16", argLength: 2, commutative: false},
|
{name: "ShiftAllRightUint32x16", argLength: 2, commutative: false},
|
||||||
|
|
@ -1104,8 +1102,6 @@ func simdGenericOps() []opData {
|
||||||
{name: "ShiftRightMaskedUint32x16", argLength: 3, commutative: false},
|
{name: "ShiftRightMaskedUint32x16", argLength: 3, commutative: false},
|
||||||
{name: "SubUint32x16", argLength: 2, commutative: false},
|
{name: "SubUint32x16", argLength: 2, commutative: false},
|
||||||
{name: "SubMaskedUint32x16", argLength: 3, commutative: false},
|
{name: "SubMaskedUint32x16", argLength: 3, commutative: false},
|
||||||
{name: "UnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 3, commutative: false},
|
|
||||||
{name: "UnsignedSignedQuadDotProdAccumulateMaskedUint32x16", argLength: 4, commutative: false},
|
|
||||||
{name: "XorUint32x16", argLength: 2, commutative: true},
|
{name: "XorUint32x16", argLength: 2, commutative: true},
|
||||||
{name: "XorMaskedUint32x16", argLength: 3, commutative: true},
|
{name: "XorMaskedUint32x16", argLength: 3, commutative: true},
|
||||||
{name: "AddUint32x4", argLength: 2, commutative: true},
|
{name: "AddUint32x4", argLength: 2, commutative: true},
|
||||||
|
|
@ -1136,20 +1132,18 @@ func simdGenericOps() []opData {
|
||||||
{name: "OrMaskedUint32x4", argLength: 3, commutative: true},
|
{name: "OrMaskedUint32x4", argLength: 3, commutative: true},
|
||||||
{name: "PairwiseAddUint32x4", argLength: 2, commutative: false},
|
{name: "PairwiseAddUint32x4", argLength: 2, commutative: false},
|
||||||
{name: "PairwiseSubUint32x4", argLength: 2, commutative: false},
|
{name: "PairwiseSubUint32x4", argLength: 2, commutative: false},
|
||||||
|
{name: "Permute2Float32x4", argLength: 3, commutative: false},
|
||||||
{name: "Permute2Uint32x4", argLength: 3, commutative: false},
|
{name: "Permute2Uint32x4", argLength: 3, commutative: false},
|
||||||
{name: "Permute2Int32x4", argLength: 3, commutative: false},
|
{name: "Permute2Int32x4", argLength: 3, commutative: false},
|
||||||
{name: "Permute2Float32x4", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2MaskedFloat32x4", argLength: 4, commutative: false},
|
|
||||||
{name: "Permute2MaskedInt32x4", argLength: 4, commutative: false},
|
{name: "Permute2MaskedInt32x4", argLength: 4, commutative: false},
|
||||||
{name: "Permute2MaskedUint32x4", argLength: 4, commutative: false},
|
{name: "Permute2MaskedUint32x4", argLength: 4, commutative: false},
|
||||||
|
{name: "Permute2MaskedFloat32x4", argLength: 4, commutative: false},
|
||||||
{name: "PopCountUint32x4", argLength: 1, commutative: false},
|
{name: "PopCountUint32x4", argLength: 1, commutative: false},
|
||||||
{name: "PopCountMaskedUint32x4", argLength: 2, commutative: false},
|
{name: "PopCountMaskedUint32x4", argLength: 2, commutative: false},
|
||||||
{name: "RotateLeftUint32x4", argLength: 2, commutative: false},
|
{name: "RotateLeftUint32x4", argLength: 2, commutative: false},
|
||||||
{name: "RotateLeftMaskedUint32x4", argLength: 3, commutative: false},
|
{name: "RotateLeftMaskedUint32x4", argLength: 3, commutative: false},
|
||||||
{name: "RotateRightUint32x4", argLength: 2, commutative: false},
|
{name: "RotateRightUint32x4", argLength: 2, commutative: false},
|
||||||
{name: "RotateRightMaskedUint32x4", argLength: 3, commutative: false},
|
{name: "RotateRightMaskedUint32x4", argLength: 3, commutative: false},
|
||||||
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 3, commutative: false},
|
|
||||||
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4", argLength: 4, commutative: false},
|
|
||||||
{name: "ShiftAllLeftUint32x4", argLength: 2, commutative: false},
|
{name: "ShiftAllLeftUint32x4", argLength: 2, commutative: false},
|
||||||
{name: "ShiftAllLeftMaskedUint32x4", argLength: 3, commutative: false},
|
{name: "ShiftAllLeftMaskedUint32x4", argLength: 3, commutative: false},
|
||||||
{name: "ShiftAllRightUint32x4", argLength: 2, commutative: false},
|
{name: "ShiftAllRightUint32x4", argLength: 2, commutative: false},
|
||||||
|
|
@ -1164,8 +1158,6 @@ func simdGenericOps() []opData {
|
||||||
{name: "ShiftRightMaskedUint32x4", argLength: 3, commutative: false},
|
{name: "ShiftRightMaskedUint32x4", argLength: 3, commutative: false},
|
||||||
{name: "SubUint32x4", argLength: 2, commutative: false},
|
{name: "SubUint32x4", argLength: 2, commutative: false},
|
||||||
{name: "SubMaskedUint32x4", argLength: 3, commutative: false},
|
{name: "SubMaskedUint32x4", argLength: 3, commutative: false},
|
||||||
{name: "UnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 3, commutative: false},
|
|
||||||
{name: "UnsignedSignedQuadDotProdAccumulateMaskedUint32x4", argLength: 4, commutative: false},
|
|
||||||
{name: "XorUint32x4", argLength: 2, commutative: true},
|
{name: "XorUint32x4", argLength: 2, commutative: true},
|
||||||
{name: "XorMaskedUint32x4", argLength: 3, commutative: true},
|
{name: "XorMaskedUint32x4", argLength: 3, commutative: true},
|
||||||
{name: "AddUint32x8", argLength: 2, commutative: true},
|
{name: "AddUint32x8", argLength: 2, commutative: true},
|
||||||
|
|
@ -1197,14 +1189,14 @@ func simdGenericOps() []opData {
|
||||||
{name: "PairwiseAddUint32x8", argLength: 2, commutative: false},
|
{name: "PairwiseAddUint32x8", argLength: 2, commutative: false},
|
||||||
{name: "PairwiseSubUint32x8", argLength: 2, commutative: false},
|
{name: "PairwiseSubUint32x8", argLength: 2, commutative: false},
|
||||||
{name: "PermuteUint32x8", argLength: 2, commutative: false},
|
{name: "PermuteUint32x8", argLength: 2, commutative: false},
|
||||||
{name: "PermuteInt32x8", argLength: 2, commutative: false},
|
|
||||||
{name: "PermuteFloat32x8", argLength: 2, commutative: false},
|
{name: "PermuteFloat32x8", argLength: 2, commutative: false},
|
||||||
{name: "Permute2Uint32x8", argLength: 3, commutative: false},
|
{name: "PermuteInt32x8", argLength: 2, commutative: false},
|
||||||
{name: "Permute2Float32x8", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Int32x8", argLength: 3, commutative: false},
|
{name: "Permute2Int32x8", argLength: 3, commutative: false},
|
||||||
|
{name: "Permute2Float32x8", argLength: 3, commutative: false},
|
||||||
|
{name: "Permute2Uint32x8", argLength: 3, commutative: false},
|
||||||
{name: "Permute2MaskedFloat32x8", argLength: 4, commutative: false},
|
{name: "Permute2MaskedFloat32x8", argLength: 4, commutative: false},
|
||||||
{name: "Permute2MaskedInt32x8", argLength: 4, commutative: false},
|
|
||||||
{name: "Permute2MaskedUint32x8", argLength: 4, commutative: false},
|
{name: "Permute2MaskedUint32x8", argLength: 4, commutative: false},
|
||||||
|
{name: "Permute2MaskedInt32x8", argLength: 4, commutative: false},
|
||||||
{name: "PermuteMaskedInt32x8", argLength: 3, commutative: false},
|
{name: "PermuteMaskedInt32x8", argLength: 3, commutative: false},
|
||||||
{name: "PermuteMaskedUint32x8", argLength: 3, commutative: false},
|
{name: "PermuteMaskedUint32x8", argLength: 3, commutative: false},
|
||||||
{name: "PermuteMaskedFloat32x8", argLength: 3, commutative: false},
|
{name: "PermuteMaskedFloat32x8", argLength: 3, commutative: false},
|
||||||
|
|
@ -1214,8 +1206,6 @@ func simdGenericOps() []opData {
|
||||||
{name: "RotateLeftMaskedUint32x8", argLength: 3, commutative: false},
|
{name: "RotateLeftMaskedUint32x8", argLength: 3, commutative: false},
|
||||||
{name: "RotateRightUint32x8", argLength: 2, commutative: false},
|
{name: "RotateRightUint32x8", argLength: 2, commutative: false},
|
||||||
{name: "RotateRightMaskedUint32x8", argLength: 3, commutative: false},
|
{name: "RotateRightMaskedUint32x8", argLength: 3, commutative: false},
|
||||||
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 3, commutative: false},
|
|
||||||
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8", argLength: 4, commutative: false},
|
|
||||||
{name: "ShiftAllLeftUint32x8", argLength: 2, commutative: false},
|
{name: "ShiftAllLeftUint32x8", argLength: 2, commutative: false},
|
||||||
{name: "ShiftAllLeftMaskedUint32x8", argLength: 3, commutative: false},
|
{name: "ShiftAllLeftMaskedUint32x8", argLength: 3, commutative: false},
|
||||||
{name: "ShiftAllRightUint32x8", argLength: 2, commutative: false},
|
{name: "ShiftAllRightUint32x8", argLength: 2, commutative: false},
|
||||||
|
|
@ -1230,8 +1220,6 @@ func simdGenericOps() []opData {
|
||||||
{name: "ShiftRightMaskedUint32x8", argLength: 3, commutative: false},
|
{name: "ShiftRightMaskedUint32x8", argLength: 3, commutative: false},
|
||||||
{name: "SubUint32x8", argLength: 2, commutative: false},
|
{name: "SubUint32x8", argLength: 2, commutative: false},
|
||||||
{name: "SubMaskedUint32x8", argLength: 3, commutative: false},
|
{name: "SubMaskedUint32x8", argLength: 3, commutative: false},
|
||||||
{name: "UnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 3, commutative: false},
|
|
||||||
{name: "UnsignedSignedQuadDotProdAccumulateMaskedUint32x8", argLength: 4, commutative: false},
|
|
||||||
{name: "XorUint32x8", argLength: 2, commutative: true},
|
{name: "XorUint32x8", argLength: 2, commutative: true},
|
||||||
{name: "XorMaskedUint32x8", argLength: 3, commutative: true},
|
{name: "XorMaskedUint32x8", argLength: 3, commutative: true},
|
||||||
{name: "AddUint64x2", argLength: 2, commutative: true},
|
{name: "AddUint64x2", argLength: 2, commutative: true},
|
||||||
|
|
@ -1265,8 +1253,8 @@ func simdGenericOps() []opData {
|
||||||
{name: "Permute2Uint64x2", argLength: 3, commutative: false},
|
{name: "Permute2Uint64x2", argLength: 3, commutative: false},
|
||||||
{name: "Permute2Int64x2", argLength: 3, commutative: false},
|
{name: "Permute2Int64x2", argLength: 3, commutative: false},
|
||||||
{name: "Permute2MaskedInt64x2", argLength: 4, commutative: false},
|
{name: "Permute2MaskedInt64x2", argLength: 4, commutative: false},
|
||||||
{name: "Permute2MaskedUint64x2", argLength: 4, commutative: false},
|
|
||||||
{name: "Permute2MaskedFloat64x2", argLength: 4, commutative: false},
|
{name: "Permute2MaskedFloat64x2", argLength: 4, commutative: false},
|
||||||
|
{name: "Permute2MaskedUint64x2", argLength: 4, commutative: false},
|
||||||
{name: "PopCountUint64x2", argLength: 1, commutative: false},
|
{name: "PopCountUint64x2", argLength: 1, commutative: false},
|
||||||
{name: "PopCountMaskedUint64x2", argLength: 2, commutative: false},
|
{name: "PopCountMaskedUint64x2", argLength: 2, commutative: false},
|
||||||
{name: "RotateLeftUint64x2", argLength: 2, commutative: false},
|
{name: "RotateLeftUint64x2", argLength: 2, commutative: false},
|
||||||
|
|
@ -1316,18 +1304,18 @@ func simdGenericOps() []opData {
|
||||||
{name: "NotEqualMaskedUint64x4", argLength: 3, commutative: true},
|
{name: "NotEqualMaskedUint64x4", argLength: 3, commutative: true},
|
||||||
{name: "OrUint64x4", argLength: 2, commutative: true},
|
{name: "OrUint64x4", argLength: 2, commutative: true},
|
||||||
{name: "OrMaskedUint64x4", argLength: 3, commutative: true},
|
{name: "OrMaskedUint64x4", argLength: 3, commutative: true},
|
||||||
{name: "PermuteFloat64x4", argLength: 2, commutative: false},
|
|
||||||
{name: "PermuteUint64x4", argLength: 2, commutative: false},
|
{name: "PermuteUint64x4", argLength: 2, commutative: false},
|
||||||
{name: "PermuteInt64x4", argLength: 2, commutative: false},
|
{name: "PermuteInt64x4", argLength: 2, commutative: false},
|
||||||
|
{name: "PermuteFloat64x4", argLength: 2, commutative: false},
|
||||||
|
{name: "Permute2Float64x4", argLength: 3, commutative: false},
|
||||||
{name: "Permute2Int64x4", argLength: 3, commutative: false},
|
{name: "Permute2Int64x4", argLength: 3, commutative: false},
|
||||||
{name: "Permute2Uint64x4", argLength: 3, commutative: false},
|
{name: "Permute2Uint64x4", argLength: 3, commutative: false},
|
||||||
{name: "Permute2Float64x4", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2MaskedFloat64x4", argLength: 4, commutative: false},
|
{name: "Permute2MaskedFloat64x4", argLength: 4, commutative: false},
|
||||||
{name: "Permute2MaskedUint64x4", argLength: 4, commutative: false},
|
{name: "Permute2MaskedUint64x4", argLength: 4, commutative: false},
|
||||||
{name: "Permute2MaskedInt64x4", argLength: 4, commutative: false},
|
{name: "Permute2MaskedInt64x4", argLength: 4, commutative: false},
|
||||||
{name: "PermuteMaskedFloat64x4", argLength: 3, commutative: false},
|
{name: "PermuteMaskedFloat64x4", argLength: 3, commutative: false},
|
||||||
{name: "PermuteMaskedUint64x4", argLength: 3, commutative: false},
|
|
||||||
{name: "PermuteMaskedInt64x4", argLength: 3, commutative: false},
|
{name: "PermuteMaskedInt64x4", argLength: 3, commutative: false},
|
||||||
|
{name: "PermuteMaskedUint64x4", argLength: 3, commutative: false},
|
||||||
{name: "PopCountUint64x4", argLength: 1, commutative: false},
|
{name: "PopCountUint64x4", argLength: 1, commutative: false},
|
||||||
{name: "PopCountMaskedUint64x4", argLength: 2, commutative: false},
|
{name: "PopCountMaskedUint64x4", argLength: 2, commutative: false},
|
||||||
{name: "RotateLeftUint64x4", argLength: 2, commutative: false},
|
{name: "RotateLeftUint64x4", argLength: 2, commutative: false},
|
||||||
|
|
@ -1377,18 +1365,18 @@ func simdGenericOps() []opData {
|
||||||
{name: "NotEqualMaskedUint64x8", argLength: 3, commutative: true},
|
{name: "NotEqualMaskedUint64x8", argLength: 3, commutative: true},
|
||||||
{name: "OrUint64x8", argLength: 2, commutative: true},
|
{name: "OrUint64x8", argLength: 2, commutative: true},
|
||||||
{name: "OrMaskedUint64x8", argLength: 3, commutative: true},
|
{name: "OrMaskedUint64x8", argLength: 3, commutative: true},
|
||||||
|
{name: "PermuteFloat64x8", argLength: 2, commutative: false},
|
||||||
{name: "PermuteInt64x8", argLength: 2, commutative: false},
|
{name: "PermuteInt64x8", argLength: 2, commutative: false},
|
||||||
{name: "PermuteUint64x8", argLength: 2, commutative: false},
|
{name: "PermuteUint64x8", argLength: 2, commutative: false},
|
||||||
{name: "PermuteFloat64x8", argLength: 2, commutative: false},
|
|
||||||
{name: "Permute2Uint64x8", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Float64x8", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Int64x8", argLength: 3, commutative: false},
|
{name: "Permute2Int64x8", argLength: 3, commutative: false},
|
||||||
|
{name: "Permute2Float64x8", argLength: 3, commutative: false},
|
||||||
|
{name: "Permute2Uint64x8", argLength: 3, commutative: false},
|
||||||
{name: "Permute2MaskedUint64x8", argLength: 4, commutative: false},
|
{name: "Permute2MaskedUint64x8", argLength: 4, commutative: false},
|
||||||
{name: "Permute2MaskedFloat64x8", argLength: 4, commutative: false},
|
|
||||||
{name: "Permute2MaskedInt64x8", argLength: 4, commutative: false},
|
{name: "Permute2MaskedInt64x8", argLength: 4, commutative: false},
|
||||||
|
{name: "Permute2MaskedFloat64x8", argLength: 4, commutative: false},
|
||||||
{name: "PermuteMaskedUint64x8", argLength: 3, commutative: false},
|
{name: "PermuteMaskedUint64x8", argLength: 3, commutative: false},
|
||||||
{name: "PermuteMaskedInt64x8", argLength: 3, commutative: false},
|
|
||||||
{name: "PermuteMaskedFloat64x8", argLength: 3, commutative: false},
|
{name: "PermuteMaskedFloat64x8", argLength: 3, commutative: false},
|
||||||
|
{name: "PermuteMaskedInt64x8", argLength: 3, commutative: false},
|
||||||
{name: "PopCountUint64x8", argLength: 1, commutative: false},
|
{name: "PopCountUint64x8", argLength: 1, commutative: false},
|
||||||
{name: "PopCountMaskedUint64x8", argLength: 2, commutative: false},
|
{name: "PopCountMaskedUint64x8", argLength: 2, commutative: false},
|
||||||
{name: "RotateLeftUint64x8", argLength: 2, commutative: false},
|
{name: "RotateLeftUint64x8", argLength: 2, commutative: false},
|
||||||
|
|
@ -1439,8 +1427,8 @@ func simdGenericOps() []opData {
|
||||||
{name: "OrUint8x16", argLength: 2, commutative: true},
|
{name: "OrUint8x16", argLength: 2, commutative: true},
|
||||||
{name: "PermuteUint8x16", argLength: 2, commutative: false},
|
{name: "PermuteUint8x16", argLength: 2, commutative: false},
|
||||||
{name: "PermuteInt8x16", argLength: 2, commutative: false},
|
{name: "PermuteInt8x16", argLength: 2, commutative: false},
|
||||||
{name: "Permute2Int8x16", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Uint8x16", argLength: 3, commutative: false},
|
{name: "Permute2Uint8x16", argLength: 3, commutative: false},
|
||||||
|
{name: "Permute2Int8x16", argLength: 3, commutative: false},
|
||||||
{name: "Permute2MaskedInt8x16", argLength: 4, commutative: false},
|
{name: "Permute2MaskedInt8x16", argLength: 4, commutative: false},
|
||||||
{name: "Permute2MaskedUint8x16", argLength: 4, commutative: false},
|
{name: "Permute2MaskedUint8x16", argLength: 4, commutative: false},
|
||||||
{name: "PermuteMaskedUint8x16", argLength: 3, commutative: false},
|
{name: "PermuteMaskedUint8x16", argLength: 3, commutative: false},
|
||||||
|
|
@ -1486,10 +1474,10 @@ func simdGenericOps() []opData {
|
||||||
{name: "PermuteInt8x32", argLength: 2, commutative: false},
|
{name: "PermuteInt8x32", argLength: 2, commutative: false},
|
||||||
{name: "Permute2Int8x32", argLength: 3, commutative: false},
|
{name: "Permute2Int8x32", argLength: 3, commutative: false},
|
||||||
{name: "Permute2Uint8x32", argLength: 3, commutative: false},
|
{name: "Permute2Uint8x32", argLength: 3, commutative: false},
|
||||||
{name: "Permute2MaskedInt8x32", argLength: 4, commutative: false},
|
|
||||||
{name: "Permute2MaskedUint8x32", argLength: 4, commutative: false},
|
{name: "Permute2MaskedUint8x32", argLength: 4, commutative: false},
|
||||||
{name: "PermuteMaskedInt8x32", argLength: 3, commutative: false},
|
{name: "Permute2MaskedInt8x32", argLength: 4, commutative: false},
|
||||||
{name: "PermuteMaskedUint8x32", argLength: 3, commutative: false},
|
{name: "PermuteMaskedUint8x32", argLength: 3, commutative: false},
|
||||||
|
{name: "PermuteMaskedInt8x32", argLength: 3, commutative: false},
|
||||||
{name: "PopCountUint8x32", argLength: 1, commutative: false},
|
{name: "PopCountUint8x32", argLength: 1, commutative: false},
|
||||||
{name: "PopCountMaskedUint8x32", argLength: 2, commutative: false},
|
{name: "PopCountMaskedUint8x32", argLength: 2, commutative: false},
|
||||||
{name: "SaturatedAddUint8x32", argLength: 2, commutative: true},
|
{name: "SaturatedAddUint8x32", argLength: 2, commutative: true},
|
||||||
|
|
|
||||||
|
|
@ -5314,8 +5314,8 @@ const (
|
||||||
OpPermute2Int16x16
|
OpPermute2Int16x16
|
||||||
OpPermute2MaskedInt16x16
|
OpPermute2MaskedInt16x16
|
||||||
OpPermute2MaskedUint16x16
|
OpPermute2MaskedUint16x16
|
||||||
OpPermuteMaskedInt16x16
|
|
||||||
OpPermuteMaskedUint16x16
|
OpPermuteMaskedUint16x16
|
||||||
|
OpPermuteMaskedInt16x16
|
||||||
OpPopCountUint16x16
|
OpPopCountUint16x16
|
||||||
OpPopCountMaskedUint16x16
|
OpPopCountMaskedUint16x16
|
||||||
OpSaturatedAddUint16x16
|
OpSaturatedAddUint16x16
|
||||||
|
|
@ -5360,12 +5360,12 @@ const (
|
||||||
OpMulHighMaskedUint16x32
|
OpMulHighMaskedUint16x32
|
||||||
OpNotEqualUint16x32
|
OpNotEqualUint16x32
|
||||||
OpNotEqualMaskedUint16x32
|
OpNotEqualMaskedUint16x32
|
||||||
OpPermuteInt16x32
|
|
||||||
OpPermuteUint16x32
|
OpPermuteUint16x32
|
||||||
OpPermute2Int16x32
|
OpPermuteInt16x32
|
||||||
OpPermute2Uint16x32
|
OpPermute2Uint16x32
|
||||||
OpPermute2MaskedInt16x32
|
OpPermute2Int16x32
|
||||||
OpPermute2MaskedUint16x32
|
OpPermute2MaskedUint16x32
|
||||||
|
OpPermute2MaskedInt16x32
|
||||||
OpPermuteMaskedUint16x32
|
OpPermuteMaskedUint16x32
|
||||||
OpPermuteMaskedInt16x32
|
OpPermuteMaskedInt16x32
|
||||||
OpPopCountUint16x32
|
OpPopCountUint16x32
|
||||||
|
|
@ -5416,14 +5416,14 @@ const (
|
||||||
OpOrUint16x8
|
OpOrUint16x8
|
||||||
OpPairwiseAddUint16x8
|
OpPairwiseAddUint16x8
|
||||||
OpPairwiseSubUint16x8
|
OpPairwiseSubUint16x8
|
||||||
OpPermuteUint16x8
|
|
||||||
OpPermuteInt16x8
|
OpPermuteInt16x8
|
||||||
|
OpPermuteUint16x8
|
||||||
OpPermute2Int16x8
|
OpPermute2Int16x8
|
||||||
OpPermute2Uint16x8
|
OpPermute2Uint16x8
|
||||||
OpPermute2MaskedUint16x8
|
|
||||||
OpPermute2MaskedInt16x8
|
OpPermute2MaskedInt16x8
|
||||||
OpPermuteMaskedInt16x8
|
OpPermute2MaskedUint16x8
|
||||||
OpPermuteMaskedUint16x8
|
OpPermuteMaskedUint16x8
|
||||||
|
OpPermuteMaskedInt16x8
|
||||||
OpPopCountUint16x8
|
OpPopCountUint16x8
|
||||||
OpPopCountMaskedUint16x8
|
OpPopCountMaskedUint16x8
|
||||||
OpSaturatedAddUint16x8
|
OpSaturatedAddUint16x8
|
||||||
|
|
@ -5470,26 +5470,24 @@ const (
|
||||||
OpNotEqualMaskedUint32x16
|
OpNotEqualMaskedUint32x16
|
||||||
OpOrUint32x16
|
OpOrUint32x16
|
||||||
OpOrMaskedUint32x16
|
OpOrMaskedUint32x16
|
||||||
OpPermuteInt32x16
|
|
||||||
OpPermuteFloat32x16
|
OpPermuteFloat32x16
|
||||||
|
OpPermuteInt32x16
|
||||||
OpPermuteUint32x16
|
OpPermuteUint32x16
|
||||||
OpPermute2Uint32x16
|
OpPermute2Uint32x16
|
||||||
OpPermute2Float32x16
|
OpPermute2Float32x16
|
||||||
OpPermute2Int32x16
|
OpPermute2Int32x16
|
||||||
OpPermute2MaskedUint32x16
|
|
||||||
OpPermute2MaskedInt32x16
|
OpPermute2MaskedInt32x16
|
||||||
OpPermute2MaskedFloat32x16
|
OpPermute2MaskedFloat32x16
|
||||||
|
OpPermute2MaskedUint32x16
|
||||||
|
OpPermuteMaskedInt32x16
|
||||||
OpPermuteMaskedFloat32x16
|
OpPermuteMaskedFloat32x16
|
||||||
OpPermuteMaskedUint32x16
|
OpPermuteMaskedUint32x16
|
||||||
OpPermuteMaskedInt32x16
|
|
||||||
OpPopCountUint32x16
|
OpPopCountUint32x16
|
||||||
OpPopCountMaskedUint32x16
|
OpPopCountMaskedUint32x16
|
||||||
OpRotateLeftUint32x16
|
OpRotateLeftUint32x16
|
||||||
OpRotateLeftMaskedUint32x16
|
OpRotateLeftMaskedUint32x16
|
||||||
OpRotateRightUint32x16
|
OpRotateRightUint32x16
|
||||||
OpRotateRightMaskedUint32x16
|
OpRotateRightMaskedUint32x16
|
||||||
OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16
|
|
||||||
OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16
|
|
||||||
OpShiftAllLeftUint32x16
|
OpShiftAllLeftUint32x16
|
||||||
OpShiftAllLeftMaskedUint32x16
|
OpShiftAllLeftMaskedUint32x16
|
||||||
OpShiftAllRightUint32x16
|
OpShiftAllRightUint32x16
|
||||||
|
|
@ -5504,8 +5502,6 @@ const (
|
||||||
OpShiftRightMaskedUint32x16
|
OpShiftRightMaskedUint32x16
|
||||||
OpSubUint32x16
|
OpSubUint32x16
|
||||||
OpSubMaskedUint32x16
|
OpSubMaskedUint32x16
|
||||||
OpUnsignedSignedQuadDotProdAccumulateUint32x16
|
|
||||||
OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x16
|
|
||||||
OpXorUint32x16
|
OpXorUint32x16
|
||||||
OpXorMaskedUint32x16
|
OpXorMaskedUint32x16
|
||||||
OpAddUint32x4
|
OpAddUint32x4
|
||||||
|
|
@ -5536,20 +5532,18 @@ const (
|
||||||
OpOrMaskedUint32x4
|
OpOrMaskedUint32x4
|
||||||
OpPairwiseAddUint32x4
|
OpPairwiseAddUint32x4
|
||||||
OpPairwiseSubUint32x4
|
OpPairwiseSubUint32x4
|
||||||
|
OpPermute2Float32x4
|
||||||
OpPermute2Uint32x4
|
OpPermute2Uint32x4
|
||||||
OpPermute2Int32x4
|
OpPermute2Int32x4
|
||||||
OpPermute2Float32x4
|
|
||||||
OpPermute2MaskedFloat32x4
|
|
||||||
OpPermute2MaskedInt32x4
|
OpPermute2MaskedInt32x4
|
||||||
OpPermute2MaskedUint32x4
|
OpPermute2MaskedUint32x4
|
||||||
|
OpPermute2MaskedFloat32x4
|
||||||
OpPopCountUint32x4
|
OpPopCountUint32x4
|
||||||
OpPopCountMaskedUint32x4
|
OpPopCountMaskedUint32x4
|
||||||
OpRotateLeftUint32x4
|
OpRotateLeftUint32x4
|
||||||
OpRotateLeftMaskedUint32x4
|
OpRotateLeftMaskedUint32x4
|
||||||
OpRotateRightUint32x4
|
OpRotateRightUint32x4
|
||||||
OpRotateRightMaskedUint32x4
|
OpRotateRightMaskedUint32x4
|
||||||
OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4
|
|
||||||
OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4
|
|
||||||
OpShiftAllLeftUint32x4
|
OpShiftAllLeftUint32x4
|
||||||
OpShiftAllLeftMaskedUint32x4
|
OpShiftAllLeftMaskedUint32x4
|
||||||
OpShiftAllRightUint32x4
|
OpShiftAllRightUint32x4
|
||||||
|
|
@ -5564,8 +5558,6 @@ const (
|
||||||
OpShiftRightMaskedUint32x4
|
OpShiftRightMaskedUint32x4
|
||||||
OpSubUint32x4
|
OpSubUint32x4
|
||||||
OpSubMaskedUint32x4
|
OpSubMaskedUint32x4
|
||||||
OpUnsignedSignedQuadDotProdAccumulateUint32x4
|
|
||||||
OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x4
|
|
||||||
OpXorUint32x4
|
OpXorUint32x4
|
||||||
OpXorMaskedUint32x4
|
OpXorMaskedUint32x4
|
||||||
OpAddUint32x8
|
OpAddUint32x8
|
||||||
|
|
@ -5597,14 +5589,14 @@ const (
|
||||||
OpPairwiseAddUint32x8
|
OpPairwiseAddUint32x8
|
||||||
OpPairwiseSubUint32x8
|
OpPairwiseSubUint32x8
|
||||||
OpPermuteUint32x8
|
OpPermuteUint32x8
|
||||||
OpPermuteInt32x8
|
|
||||||
OpPermuteFloat32x8
|
OpPermuteFloat32x8
|
||||||
OpPermute2Uint32x8
|
OpPermuteInt32x8
|
||||||
OpPermute2Float32x8
|
|
||||||
OpPermute2Int32x8
|
OpPermute2Int32x8
|
||||||
|
OpPermute2Float32x8
|
||||||
|
OpPermute2Uint32x8
|
||||||
OpPermute2MaskedFloat32x8
|
OpPermute2MaskedFloat32x8
|
||||||
OpPermute2MaskedInt32x8
|
|
||||||
OpPermute2MaskedUint32x8
|
OpPermute2MaskedUint32x8
|
||||||
|
OpPermute2MaskedInt32x8
|
||||||
OpPermuteMaskedInt32x8
|
OpPermuteMaskedInt32x8
|
||||||
OpPermuteMaskedUint32x8
|
OpPermuteMaskedUint32x8
|
||||||
OpPermuteMaskedFloat32x8
|
OpPermuteMaskedFloat32x8
|
||||||
|
|
@ -5614,8 +5606,6 @@ const (
|
||||||
OpRotateLeftMaskedUint32x8
|
OpRotateLeftMaskedUint32x8
|
||||||
OpRotateRightUint32x8
|
OpRotateRightUint32x8
|
||||||
OpRotateRightMaskedUint32x8
|
OpRotateRightMaskedUint32x8
|
||||||
OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8
|
|
||||||
OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8
|
|
||||||
OpShiftAllLeftUint32x8
|
OpShiftAllLeftUint32x8
|
||||||
OpShiftAllLeftMaskedUint32x8
|
OpShiftAllLeftMaskedUint32x8
|
||||||
OpShiftAllRightUint32x8
|
OpShiftAllRightUint32x8
|
||||||
|
|
@ -5630,8 +5620,6 @@ const (
|
||||||
OpShiftRightMaskedUint32x8
|
OpShiftRightMaskedUint32x8
|
||||||
OpSubUint32x8
|
OpSubUint32x8
|
||||||
OpSubMaskedUint32x8
|
OpSubMaskedUint32x8
|
||||||
OpUnsignedSignedQuadDotProdAccumulateUint32x8
|
|
||||||
OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x8
|
|
||||||
OpXorUint32x8
|
OpXorUint32x8
|
||||||
OpXorMaskedUint32x8
|
OpXorMaskedUint32x8
|
||||||
OpAddUint64x2
|
OpAddUint64x2
|
||||||
|
|
@ -5665,8 +5653,8 @@ const (
|
||||||
OpPermute2Uint64x2
|
OpPermute2Uint64x2
|
||||||
OpPermute2Int64x2
|
OpPermute2Int64x2
|
||||||
OpPermute2MaskedInt64x2
|
OpPermute2MaskedInt64x2
|
||||||
OpPermute2MaskedUint64x2
|
|
||||||
OpPermute2MaskedFloat64x2
|
OpPermute2MaskedFloat64x2
|
||||||
|
OpPermute2MaskedUint64x2
|
||||||
OpPopCountUint64x2
|
OpPopCountUint64x2
|
||||||
OpPopCountMaskedUint64x2
|
OpPopCountMaskedUint64x2
|
||||||
OpRotateLeftUint64x2
|
OpRotateLeftUint64x2
|
||||||
|
|
@ -5716,18 +5704,18 @@ const (
|
||||||
OpNotEqualMaskedUint64x4
|
OpNotEqualMaskedUint64x4
|
||||||
OpOrUint64x4
|
OpOrUint64x4
|
||||||
OpOrMaskedUint64x4
|
OpOrMaskedUint64x4
|
||||||
OpPermuteFloat64x4
|
|
||||||
OpPermuteUint64x4
|
OpPermuteUint64x4
|
||||||
OpPermuteInt64x4
|
OpPermuteInt64x4
|
||||||
|
OpPermuteFloat64x4
|
||||||
|
OpPermute2Float64x4
|
||||||
OpPermute2Int64x4
|
OpPermute2Int64x4
|
||||||
OpPermute2Uint64x4
|
OpPermute2Uint64x4
|
||||||
OpPermute2Float64x4
|
|
||||||
OpPermute2MaskedFloat64x4
|
OpPermute2MaskedFloat64x4
|
||||||
OpPermute2MaskedUint64x4
|
OpPermute2MaskedUint64x4
|
||||||
OpPermute2MaskedInt64x4
|
OpPermute2MaskedInt64x4
|
||||||
OpPermuteMaskedFloat64x4
|
OpPermuteMaskedFloat64x4
|
||||||
OpPermuteMaskedUint64x4
|
|
||||||
OpPermuteMaskedInt64x4
|
OpPermuteMaskedInt64x4
|
||||||
|
OpPermuteMaskedUint64x4
|
||||||
OpPopCountUint64x4
|
OpPopCountUint64x4
|
||||||
OpPopCountMaskedUint64x4
|
OpPopCountMaskedUint64x4
|
||||||
OpRotateLeftUint64x4
|
OpRotateLeftUint64x4
|
||||||
|
|
@ -5777,18 +5765,18 @@ const (
|
||||||
OpNotEqualMaskedUint64x8
|
OpNotEqualMaskedUint64x8
|
||||||
OpOrUint64x8
|
OpOrUint64x8
|
||||||
OpOrMaskedUint64x8
|
OpOrMaskedUint64x8
|
||||||
|
OpPermuteFloat64x8
|
||||||
OpPermuteInt64x8
|
OpPermuteInt64x8
|
||||||
OpPermuteUint64x8
|
OpPermuteUint64x8
|
||||||
OpPermuteFloat64x8
|
|
||||||
OpPermute2Uint64x8
|
|
||||||
OpPermute2Float64x8
|
|
||||||
OpPermute2Int64x8
|
OpPermute2Int64x8
|
||||||
|
OpPermute2Float64x8
|
||||||
|
OpPermute2Uint64x8
|
||||||
OpPermute2MaskedUint64x8
|
OpPermute2MaskedUint64x8
|
||||||
OpPermute2MaskedFloat64x8
|
|
||||||
OpPermute2MaskedInt64x8
|
OpPermute2MaskedInt64x8
|
||||||
|
OpPermute2MaskedFloat64x8
|
||||||
OpPermuteMaskedUint64x8
|
OpPermuteMaskedUint64x8
|
||||||
OpPermuteMaskedInt64x8
|
|
||||||
OpPermuteMaskedFloat64x8
|
OpPermuteMaskedFloat64x8
|
||||||
|
OpPermuteMaskedInt64x8
|
||||||
OpPopCountUint64x8
|
OpPopCountUint64x8
|
||||||
OpPopCountMaskedUint64x8
|
OpPopCountMaskedUint64x8
|
||||||
OpRotateLeftUint64x8
|
OpRotateLeftUint64x8
|
||||||
|
|
@ -5839,8 +5827,8 @@ const (
|
||||||
OpOrUint8x16
|
OpOrUint8x16
|
||||||
OpPermuteUint8x16
|
OpPermuteUint8x16
|
||||||
OpPermuteInt8x16
|
OpPermuteInt8x16
|
||||||
OpPermute2Int8x16
|
|
||||||
OpPermute2Uint8x16
|
OpPermute2Uint8x16
|
||||||
|
OpPermute2Int8x16
|
||||||
OpPermute2MaskedInt8x16
|
OpPermute2MaskedInt8x16
|
||||||
OpPermute2MaskedUint8x16
|
OpPermute2MaskedUint8x16
|
||||||
OpPermuteMaskedUint8x16
|
OpPermuteMaskedUint8x16
|
||||||
|
|
@ -5886,10 +5874,10 @@ const (
|
||||||
OpPermuteInt8x32
|
OpPermuteInt8x32
|
||||||
OpPermute2Int8x32
|
OpPermute2Int8x32
|
||||||
OpPermute2Uint8x32
|
OpPermute2Uint8x32
|
||||||
OpPermute2MaskedInt8x32
|
|
||||||
OpPermute2MaskedUint8x32
|
OpPermute2MaskedUint8x32
|
||||||
OpPermuteMaskedInt8x32
|
OpPermute2MaskedInt8x32
|
||||||
OpPermuteMaskedUint8x32
|
OpPermuteMaskedUint8x32
|
||||||
|
OpPermuteMaskedInt8x32
|
||||||
OpPopCountUint8x32
|
OpPopCountUint8x32
|
||||||
OpPopCountMaskedUint8x32
|
OpPopCountMaskedUint8x32
|
||||||
OpSaturatedAddUint8x32
|
OpSaturatedAddUint8x32
|
||||||
|
|
@ -65610,12 +65598,12 @@ var opcodeTable = [...]opInfo{
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "PermuteMaskedInt16x16",
|
name: "PermuteMaskedUint16x16",
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "PermuteMaskedUint16x16",
|
name: "PermuteMaskedInt16x16",
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
|
@ -65856,19 +65844,14 @@ var opcodeTable = [...]opInfo{
|
||||||
commutative: true,
|
commutative: true,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "PermuteInt16x32",
|
|
||||||
argLen: 2,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "PermuteUint16x32",
|
name: "PermuteUint16x32",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "Permute2Int16x32",
|
name: "PermuteInt16x32",
|
||||||
argLen: 3,
|
argLen: 2,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -65877,8 +65860,8 @@ var opcodeTable = [...]opInfo{
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "Permute2MaskedInt16x32",
|
name: "Permute2Int16x32",
|
||||||
argLen: 4,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -65886,6 +65869,11 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 4,
|
argLen: 4,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "Permute2MaskedInt16x32",
|
||||||
|
argLen: 4,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "PermuteMaskedUint16x32",
|
name: "PermuteMaskedUint16x32",
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
|
|
@ -66155,12 +66143,12 @@ var opcodeTable = [...]opInfo{
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "PermuteUint16x8",
|
name: "PermuteInt16x8",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "PermuteInt16x8",
|
name: "PermuteUint16x8",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
|
@ -66174,19 +66162,14 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "Permute2MaskedUint16x8",
|
|
||||||
argLen: 4,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "Permute2MaskedInt16x8",
|
name: "Permute2MaskedInt16x8",
|
||||||
argLen: 4,
|
argLen: 4,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "PermuteMaskedInt16x8",
|
name: "Permute2MaskedUint16x8",
|
||||||
argLen: 3,
|
argLen: 4,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -66194,6 +66177,11 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "PermuteMaskedInt16x8",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "PopCountUint16x8",
|
name: "PopCountUint16x8",
|
||||||
argLen: 1,
|
argLen: 1,
|
||||||
|
|
@ -66442,12 +66430,12 @@ var opcodeTable = [...]opInfo{
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "PermuteInt32x16",
|
name: "PermuteFloat32x16",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "PermuteFloat32x16",
|
name: "PermuteInt32x16",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
|
@ -66471,11 +66459,6 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "Permute2MaskedUint32x16",
|
|
||||||
argLen: 4,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "Permute2MaskedInt32x16",
|
name: "Permute2MaskedInt32x16",
|
||||||
argLen: 4,
|
argLen: 4,
|
||||||
|
|
@ -66486,6 +66469,16 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 4,
|
argLen: 4,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "Permute2MaskedUint32x16",
|
||||||
|
argLen: 4,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "PermuteMaskedInt32x16",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "PermuteMaskedFloat32x16",
|
name: "PermuteMaskedFloat32x16",
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
|
|
@ -66496,11 +66489,6 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "PermuteMaskedInt32x16",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "PopCountUint32x16",
|
name: "PopCountUint32x16",
|
||||||
argLen: 1,
|
argLen: 1,
|
||||||
|
|
@ -66531,16 +66519,6 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16",
|
|
||||||
argLen: 4,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "ShiftAllLeftUint32x16",
|
name: "ShiftAllLeftUint32x16",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
@ -66611,16 +66589,6 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "UnsignedSignedQuadDotProdAccumulateUint32x16",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "UnsignedSignedQuadDotProdAccumulateMaskedUint32x16",
|
|
||||||
argLen: 4,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "XorUint32x16",
|
name: "XorUint32x16",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
@ -66788,6 +66756,11 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "Permute2Float32x4",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "Permute2Uint32x4",
|
name: "Permute2Uint32x4",
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
|
|
@ -66798,16 +66771,6 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "Permute2Float32x4",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2MaskedFloat32x4",
|
|
||||||
argLen: 4,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "Permute2MaskedInt32x4",
|
name: "Permute2MaskedInt32x4",
|
||||||
argLen: 4,
|
argLen: 4,
|
||||||
|
|
@ -66818,6 +66781,11 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 4,
|
argLen: 4,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "Permute2MaskedFloat32x4",
|
||||||
|
argLen: 4,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "PopCountUint32x4",
|
name: "PopCountUint32x4",
|
||||||
argLen: 1,
|
argLen: 1,
|
||||||
|
|
@ -66848,16 +66816,6 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x4",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4",
|
|
||||||
argLen: 4,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "ShiftAllLeftUint32x4",
|
name: "ShiftAllLeftUint32x4",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
@ -66928,16 +66886,6 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "UnsignedSignedQuadDotProdAccumulateUint32x4",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "UnsignedSignedQuadDotProdAccumulateMaskedUint32x4",
|
|
||||||
argLen: 4,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "XorUint32x4",
|
name: "XorUint32x4",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
@ -67110,18 +67058,18 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "PermuteInt32x8",
|
|
||||||
argLen: 2,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "PermuteFloat32x8",
|
name: "PermuteFloat32x8",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "Permute2Uint32x8",
|
name: "PermuteInt32x8",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Permute2Int32x8",
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
|
@ -67131,7 +67079,7 @@ var opcodeTable = [...]opInfo{
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "Permute2Int32x8",
|
name: "Permute2Uint32x8",
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
|
@ -67141,12 +67089,12 @@ var opcodeTable = [...]opInfo{
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "Permute2MaskedInt32x8",
|
name: "Permute2MaskedUint32x8",
|
||||||
argLen: 4,
|
argLen: 4,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "Permute2MaskedUint32x8",
|
name: "Permute2MaskedInt32x8",
|
||||||
argLen: 4,
|
argLen: 4,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
|
@ -67195,16 +67143,6 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8",
|
|
||||||
argLen: 4,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "ShiftAllLeftUint32x8",
|
name: "ShiftAllLeftUint32x8",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
@ -67275,16 +67213,6 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "UnsignedSignedQuadDotProdAccumulateUint32x8",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "UnsignedSignedQuadDotProdAccumulateMaskedUint32x8",
|
|
||||||
argLen: 4,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "XorUint32x8",
|
name: "XorUint32x8",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
@ -67469,12 +67397,12 @@ var opcodeTable = [...]opInfo{
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "Permute2MaskedUint64x2",
|
name: "Permute2MaskedFloat64x2",
|
||||||
argLen: 4,
|
argLen: 4,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "Permute2MaskedFloat64x2",
|
name: "Permute2MaskedUint64x2",
|
||||||
argLen: 4,
|
argLen: 4,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
|
@ -67741,11 +67669,6 @@ var opcodeTable = [...]opInfo{
|
||||||
commutative: true,
|
commutative: true,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "PermuteFloat64x4",
|
|
||||||
argLen: 2,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "PermuteUint64x4",
|
name: "PermuteUint64x4",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
@ -67756,6 +67679,16 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "PermuteFloat64x4",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Permute2Float64x4",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "Permute2Int64x4",
|
name: "Permute2Int64x4",
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
|
|
@ -67766,11 +67699,6 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "Permute2Float64x4",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "Permute2MaskedFloat64x4",
|
name: "Permute2MaskedFloat64x4",
|
||||||
argLen: 4,
|
argLen: 4,
|
||||||
|
|
@ -67792,12 +67720,12 @@ var opcodeTable = [...]opInfo{
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "PermuteMaskedUint64x4",
|
name: "PermuteMaskedInt64x4",
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "PermuteMaskedInt64x4",
|
name: "PermuteMaskedUint64x4",
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
|
@ -68064,6 +67992,11 @@ var opcodeTable = [...]opInfo{
|
||||||
commutative: true,
|
commutative: true,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "PermuteFloat64x8",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "PermuteInt64x8",
|
name: "PermuteInt64x8",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
@ -68075,12 +68008,7 @@ var opcodeTable = [...]opInfo{
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "PermuteFloat64x8",
|
name: "Permute2Int64x8",
|
||||||
argLen: 2,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Uint64x8",
|
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
|
@ -68090,7 +68018,7 @@ var opcodeTable = [...]opInfo{
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "Permute2Int64x8",
|
name: "Permute2Uint64x8",
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
|
@ -68100,12 +68028,12 @@ var opcodeTable = [...]opInfo{
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "Permute2MaskedFloat64x8",
|
name: "Permute2MaskedInt64x8",
|
||||||
argLen: 4,
|
argLen: 4,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "Permute2MaskedInt64x8",
|
name: "Permute2MaskedFloat64x8",
|
||||||
argLen: 4,
|
argLen: 4,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
|
@ -68115,12 +68043,12 @@ var opcodeTable = [...]opInfo{
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "PermuteMaskedInt64x8",
|
name: "PermuteMaskedFloat64x8",
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "PermuteMaskedFloat64x8",
|
name: "PermuteMaskedInt64x8",
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
|
@ -68391,12 +68319,12 @@ var opcodeTable = [...]opInfo{
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "Permute2Int8x16",
|
name: "Permute2Uint8x16",
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "Permute2Uint8x16",
|
name: "Permute2Int8x16",
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
|
@ -68642,19 +68570,14 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "Permute2MaskedInt8x32",
|
|
||||||
argLen: 4,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "Permute2MaskedUint8x32",
|
name: "Permute2MaskedUint8x32",
|
||||||
argLen: 4,
|
argLen: 4,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "PermuteMaskedInt8x32",
|
name: "Permute2MaskedInt8x32",
|
||||||
argLen: 3,
|
argLen: 4,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -68662,6 +68585,11 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "PermuteMaskedInt8x32",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "PopCountUint8x32",
|
name: "PopCountUint8x32",
|
||||||
argLen: 1,
|
argLen: 1,
|
||||||
|
|
|
||||||
|
|
@ -4297,21 +4297,6 @@ func rewriteValueAMD64(v *Value) bool {
|
||||||
return rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4(v)
|
return rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4(v)
|
||||||
case OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8:
|
case OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8:
|
||||||
return rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8(v)
|
return rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8(v)
|
||||||
case OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16:
|
|
||||||
return rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16(v)
|
|
||||||
case OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4:
|
|
||||||
return rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4(v)
|
|
||||||
case OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8:
|
|
||||||
return rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8(v)
|
|
||||||
case OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16:
|
|
||||||
v.Op = OpAMD64VPDPBUSDS512
|
|
||||||
return true
|
|
||||||
case OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4:
|
|
||||||
v.Op = OpAMD64VPDPBUSDS128
|
|
||||||
return true
|
|
||||||
case OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8:
|
|
||||||
v.Op = OpAMD64VPDPBUSDS256
|
|
||||||
return true
|
|
||||||
case OpSelect0:
|
case OpSelect0:
|
||||||
return rewriteValueAMD64_OpSelect0(v)
|
return rewriteValueAMD64_OpSelect0(v)
|
||||||
case OpSelect1:
|
case OpSelect1:
|
||||||
|
|
@ -5416,21 +5401,6 @@ func rewriteValueAMD64(v *Value) bool {
|
||||||
return rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x4(v)
|
return rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x4(v)
|
||||||
case OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x8:
|
case OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x8:
|
||||||
return rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x8(v)
|
return rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x8(v)
|
||||||
case OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x16:
|
|
||||||
return rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x16(v)
|
|
||||||
case OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x4:
|
|
||||||
return rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x4(v)
|
|
||||||
case OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x8:
|
|
||||||
return rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x8(v)
|
|
||||||
case OpUnsignedSignedQuadDotProdAccumulateUint32x16:
|
|
||||||
v.Op = OpAMD64VPDPBUSD512
|
|
||||||
return true
|
|
||||||
case OpUnsignedSignedQuadDotProdAccumulateUint32x4:
|
|
||||||
v.Op = OpAMD64VPDPBUSD128
|
|
||||||
return true
|
|
||||||
case OpUnsignedSignedQuadDotProdAccumulateUint32x8:
|
|
||||||
v.Op = OpAMD64VPDPBUSD256
|
|
||||||
return true
|
|
||||||
case OpWB:
|
case OpWB:
|
||||||
v.Op = OpAMD64LoweredWB
|
v.Op = OpAMD64LoweredWB
|
||||||
return true
|
return true
|
||||||
|
|
@ -49615,66 +49585,6 @@ func rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
func rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16(v *Value) bool {
|
|
||||||
v_3 := v.Args[3]
|
|
||||||
v_2 := v.Args[2]
|
|
||||||
v_1 := v.Args[1]
|
|
||||||
v_0 := v.Args[0]
|
|
||||||
b := v.Block
|
|
||||||
// match: (SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16 x y z mask)
|
|
||||||
// result: (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
|
|
||||||
for {
|
|
||||||
x := v_0
|
|
||||||
y := v_1
|
|
||||||
z := v_2
|
|
||||||
mask := v_3
|
|
||||||
v.reset(OpAMD64VPDPBUSDSMasked512)
|
|
||||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
|
|
||||||
v0.AddArg(mask)
|
|
||||||
v.AddArg4(x, y, z, v0)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
func rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4(v *Value) bool {
|
|
||||||
v_3 := v.Args[3]
|
|
||||||
v_2 := v.Args[2]
|
|
||||||
v_1 := v.Args[1]
|
|
||||||
v_0 := v.Args[0]
|
|
||||||
b := v.Block
|
|
||||||
// match: (SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4 x y z mask)
|
|
||||||
// result: (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
|
|
||||||
for {
|
|
||||||
x := v_0
|
|
||||||
y := v_1
|
|
||||||
z := v_2
|
|
||||||
mask := v_3
|
|
||||||
v.reset(OpAMD64VPDPBUSDSMasked128)
|
|
||||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
|
|
||||||
v0.AddArg(mask)
|
|
||||||
v.AddArg4(x, y, z, v0)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
func rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8(v *Value) bool {
|
|
||||||
v_3 := v.Args[3]
|
|
||||||
v_2 := v.Args[2]
|
|
||||||
v_1 := v.Args[1]
|
|
||||||
v_0 := v.Args[0]
|
|
||||||
b := v.Block
|
|
||||||
// match: (SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8 x y z mask)
|
|
||||||
// result: (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
|
|
||||||
for {
|
|
||||||
x := v_0
|
|
||||||
y := v_1
|
|
||||||
z := v_2
|
|
||||||
mask := v_3
|
|
||||||
v.reset(OpAMD64VPDPBUSDSMasked256)
|
|
||||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
|
|
||||||
v0.AddArg(mask)
|
|
||||||
v.AddArg4(x, y, z, v0)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
func rewriteValueAMD64_OpSelect0(v *Value) bool {
|
func rewriteValueAMD64_OpSelect0(v *Value) bool {
|
||||||
v_0 := v.Args[0]
|
v_0 := v.Args[0]
|
||||||
b := v.Block
|
b := v.Block
|
||||||
|
|
@ -53973,66 +53883,6 @@ func rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x8(v *Val
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
func rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x16(v *Value) bool {
|
|
||||||
v_3 := v.Args[3]
|
|
||||||
v_2 := v.Args[2]
|
|
||||||
v_1 := v.Args[1]
|
|
||||||
v_0 := v.Args[0]
|
|
||||||
b := v.Block
|
|
||||||
// match: (UnsignedSignedQuadDotProdAccumulateMaskedUint32x16 x y z mask)
|
|
||||||
// result: (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
|
|
||||||
for {
|
|
||||||
x := v_0
|
|
||||||
y := v_1
|
|
||||||
z := v_2
|
|
||||||
mask := v_3
|
|
||||||
v.reset(OpAMD64VPDPBUSDMasked512)
|
|
||||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
|
|
||||||
v0.AddArg(mask)
|
|
||||||
v.AddArg4(x, y, z, v0)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
func rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x4(v *Value) bool {
|
|
||||||
v_3 := v.Args[3]
|
|
||||||
v_2 := v.Args[2]
|
|
||||||
v_1 := v.Args[1]
|
|
||||||
v_0 := v.Args[0]
|
|
||||||
b := v.Block
|
|
||||||
// match: (UnsignedSignedQuadDotProdAccumulateMaskedUint32x4 x y z mask)
|
|
||||||
// result: (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
|
|
||||||
for {
|
|
||||||
x := v_0
|
|
||||||
y := v_1
|
|
||||||
z := v_2
|
|
||||||
mask := v_3
|
|
||||||
v.reset(OpAMD64VPDPBUSDMasked128)
|
|
||||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
|
|
||||||
v0.AddArg(mask)
|
|
||||||
v.AddArg4(x, y, z, v0)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
func rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x8(v *Value) bool {
|
|
||||||
v_3 := v.Args[3]
|
|
||||||
v_2 := v.Args[2]
|
|
||||||
v_1 := v.Args[1]
|
|
||||||
v_0 := v.Args[0]
|
|
||||||
b := v.Block
|
|
||||||
// match: (UnsignedSignedQuadDotProdAccumulateMaskedUint32x8 x y z mask)
|
|
||||||
// result: (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
|
|
||||||
for {
|
|
||||||
x := v_0
|
|
||||||
y := v_1
|
|
||||||
z := v_2
|
|
||||||
mask := v_3
|
|
||||||
v.reset(OpAMD64VPDPBUSDMasked256)
|
|
||||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
|
|
||||||
v0.AddArg(mask)
|
|
||||||
v.AddArg4(x, y, z, v0)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
func rewriteValueAMD64_OpXorMaskedInt32x16(v *Value) bool {
|
func rewriteValueAMD64_OpXorMaskedInt32x16(v *Value) bool {
|
||||||
v_2 := v.Args[2]
|
v_2 := v.Args[2]
|
||||||
v_1 := v.Args[1]
|
v_1 := v.Args[1]
|
||||||
|
|
|
||||||
|
|
@ -1634,6 +1634,12 @@ func opLen3(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func opLen3_31(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||||
|
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||||
|
return s.newValue3(op, t, args[2], args[1], args[0])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func opLen3_21(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
func opLen3_21(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||||
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||||
return s.newValue3(op, t, args[1], args[0], args[2])
|
return s.newValue3(op, t, args[1], args[0], args[2])
|
||||||
|
|
@ -1658,6 +1664,12 @@ func opLen4_231(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func opLen4_31(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||||
|
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||||
|
return s.newValue4(op, t, args[2], args[1], args[0], args[3])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func plainPanicSimdImm(s *state) {
|
func plainPanicSimdImm(s *state) {
|
||||||
cmp := s.newValue0(ssa.OpConstBool, types.Types[types.TBOOL])
|
cmp := s.newValue0(ssa.OpConstBool, types.Types[types.TBOOL])
|
||||||
cmp.AuxInt = 0
|
cmp.AuxInt = 0
|
||||||
|
|
|
||||||
|
|
@ -993,12 +993,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||||
addF(simdPackage, "Int16x8.PairDotProd", opLen2(ssa.OpPairDotProdInt16x8, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int16x8.PairDotProd", opLen2(ssa.OpPairDotProdInt16x8, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int16x16.PairDotProd", opLen2(ssa.OpPairDotProdInt16x16, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int16x16.PairDotProd", opLen2(ssa.OpPairDotProdInt16x16, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Int16x32.PairDotProd", opLen2(ssa.OpPairDotProdInt16x32, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Int16x32.PairDotProd", opLen2(ssa.OpPairDotProdInt16x32, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x4.PairDotProdAccumulate", opLen3(ssa.OpPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int16x8.PairDotProdAccumulate", opLen3_31(ssa.OpPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x8.PairDotProdAccumulate", opLen3(ssa.OpPairDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int16x16.PairDotProdAccumulate", opLen3_31(ssa.OpPairDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x16.PairDotProdAccumulate", opLen3(ssa.OpPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Int16x32.PairDotProdAccumulate", opLen3_31(ssa.OpPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x4.PairDotProdAccumulateMasked", opLen4(ssa.OpPairDotProdAccumulateMaskedInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int16x8.PairDotProdAccumulateMasked", opLen4_31(ssa.OpPairDotProdAccumulateMaskedInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x8.PairDotProdAccumulateMasked", opLen4(ssa.OpPairDotProdAccumulateMaskedInt32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int16x16.PairDotProdAccumulateMasked", opLen4_31(ssa.OpPairDotProdAccumulateMaskedInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x16.PairDotProdAccumulateMasked", opLen4(ssa.OpPairDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Int16x32.PairDotProdAccumulateMasked", opLen4_31(ssa.OpPairDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Int16x8.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x8, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int16x8.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x8, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int16x16.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x16, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int16x16.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x16, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Int16x32.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x32, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Int16x32.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x32, types.TypeVec512), sys.AMD64)
|
||||||
|
|
@ -1318,12 +1318,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||||
addF(simdPackage, "Uint16x8.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint16x8, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Uint16x8.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint16x8, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Uint16x16.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint16x16, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Uint16x16.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint16x16, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Uint16x32.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint16x32, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Uint16x32.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint16x32, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x4.SaturatedPairDotProdAccumulate", opLen3(ssa.OpSaturatedPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int16x8.SaturatedPairDotProdAccumulate", opLen3_31(ssa.OpSaturatedPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x8.SaturatedPairDotProdAccumulate", opLen3(ssa.OpSaturatedPairDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int16x16.SaturatedPairDotProdAccumulate", opLen3_31(ssa.OpSaturatedPairDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x16.SaturatedPairDotProdAccumulate", opLen3(ssa.OpSaturatedPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Int16x32.SaturatedPairDotProdAccumulate", opLen3_31(ssa.OpSaturatedPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x4.SaturatedPairDotProdAccumulateMasked", opLen4(ssa.OpSaturatedPairDotProdAccumulateMaskedInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int16x8.SaturatedPairDotProdAccumulateMasked", opLen4_31(ssa.OpSaturatedPairDotProdAccumulateMaskedInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x8.SaturatedPairDotProdAccumulateMasked", opLen4(ssa.OpSaturatedPairDotProdAccumulateMaskedInt32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int16x16.SaturatedPairDotProdAccumulateMasked", opLen4_31(ssa.OpSaturatedPairDotProdAccumulateMaskedInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x16.SaturatedPairDotProdAccumulateMasked", opLen4(ssa.OpSaturatedPairDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Int16x32.SaturatedPairDotProdAccumulateMasked", opLen4_31(ssa.OpSaturatedPairDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Int16x8.SaturatedPairwiseAdd", opLen2(ssa.OpSaturatedPairwiseAddInt16x8, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int16x8.SaturatedPairwiseAdd", opLen2(ssa.OpSaturatedPairwiseAddInt16x8, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int16x16.SaturatedPairwiseAdd", opLen2(ssa.OpSaturatedPairwiseAddInt16x16, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int16x16.SaturatedPairwiseAdd", opLen2(ssa.OpSaturatedPairwiseAddInt16x16, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Int16x8.SaturatedPairwiseSub", opLen2(ssa.OpSaturatedPairwiseSubInt16x8, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int16x8.SaturatedPairwiseSub", opLen2(ssa.OpSaturatedPairwiseSubInt16x8, types.TypeVec128), sys.AMD64)
|
||||||
|
|
@ -1358,18 +1358,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||||
addF(simdPackage, "Uint8x16.SaturatedUnsignedSignedPairDotProdMasked", opLen3(ssa.OpSaturatedUnsignedSignedPairDotProdMaskedUint8x16, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Uint8x16.SaturatedUnsignedSignedPairDotProdMasked", opLen3(ssa.OpSaturatedUnsignedSignedPairDotProdMaskedUint8x16, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Uint8x32.SaturatedUnsignedSignedPairDotProdMasked", opLen3(ssa.OpSaturatedUnsignedSignedPairDotProdMaskedUint8x32, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Uint8x32.SaturatedUnsignedSignedPairDotProdMasked", opLen3(ssa.OpSaturatedUnsignedSignedPairDotProdMaskedUint8x32, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Uint8x64.SaturatedUnsignedSignedPairDotProdMasked", opLen3(ssa.OpSaturatedUnsignedSignedPairDotProdMaskedUint8x64, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Uint8x64.SaturatedUnsignedSignedPairDotProdMasked", opLen3(ssa.OpSaturatedUnsignedSignedPairDotProdMaskedUint8x64, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x4.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int8x16.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x8.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int8x32.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x16.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Int8x64.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Uint32x4.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int8x16.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Uint32x8.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int8x32.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Uint32x16.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Int8x64.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x4.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4, types.TypeVec128), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int32x8.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8, types.TypeVec256), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int32x16.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint32x4.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4, types.TypeVec128), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint32x8.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8, types.TypeVec256), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint32x16.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Float32x8.Set128", opLen2Imm8(ssa.OpSet128Float32x8, types.TypeVec256, 0), sys.AMD64)
|
addF(simdPackage, "Float32x8.Set128", opLen2Imm8(ssa.OpSet128Float32x8, types.TypeVec256, 0), sys.AMD64)
|
||||||
addF(simdPackage, "Float64x4.Set128", opLen2Imm8(ssa.OpSet128Float64x4, types.TypeVec256, 0), sys.AMD64)
|
addF(simdPackage, "Float64x4.Set128", opLen2Imm8(ssa.OpSet128Float64x4, types.TypeVec256, 0), sys.AMD64)
|
||||||
addF(simdPackage, "Int8x32.Set128", opLen2Imm8(ssa.OpSet128Int8x32, types.TypeVec256, 0), sys.AMD64)
|
addF(simdPackage, "Int8x32.Set128", opLen2Imm8(ssa.OpSet128Int8x32, types.TypeVec256, 0), sys.AMD64)
|
||||||
|
|
@ -1770,18 +1764,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||||
addF(simdPackage, "Float64x2.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
addF(simdPackage, "Float64x2.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||||
addF(simdPackage, "Float64x4.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
addF(simdPackage, "Float64x4.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||||
addF(simdPackage, "Float64x8.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
addF(simdPackage, "Float64x8.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x4.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int8x16.UnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x8.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int8x32.UnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x16.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Int8x64.UnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Uint32x4.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int8x16.UnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Uint32x8.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int8x32.UnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Uint32x16.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Int8x64.UnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x4.UnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x4, types.TypeVec128), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int32x8.UnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x8, types.TypeVec256), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int32x16.UnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint32x4.UnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x4, types.TypeVec128), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint32x8.UnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x8, types.TypeVec256), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint32x16.UnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x16, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int8x16.Xor", opLen2(ssa.OpXorInt8x16, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int8x16.Xor", opLen2(ssa.OpXorInt8x16, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int8x32.Xor", opLen2(ssa.OpXorInt8x32, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int8x32.Xor", opLen2(ssa.OpXorInt8x32, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Int16x8.Xor", opLen2(ssa.OpXorInt16x8, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int16x8.Xor", opLen2(ssa.OpXorInt16x8, types.TypeVec128), sys.AMD64)
|
||||||
|
|
|
||||||
|
|
@ -2115,192 +2115,192 @@ func (x Float64x8) FloorWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
|
||||||
|
|
||||||
/* FusedMultiplyAdd */
|
/* FusedMultiplyAdd */
|
||||||
|
|
||||||
// FusedMultiplyAdd performs `(v1 * v2) + v3`.
|
// FusedMultiplyAdd performs (x * y) + z.
|
||||||
//
|
//
|
||||||
// Asm: VFMADD213PS, CPU Feature: AVX512F
|
// Asm: VFMADD213PS, CPU Feature: AVX512F
|
||||||
func (x Float32x4) FusedMultiplyAdd(y Float32x4, z Float32x4) Float32x4
|
func (x Float32x4) FusedMultiplyAdd(y Float32x4, z Float32x4) Float32x4
|
||||||
|
|
||||||
// FusedMultiplyAdd performs `(v1 * v2) + v3`.
|
// FusedMultiplyAdd performs (x * y) + z.
|
||||||
//
|
//
|
||||||
// Asm: VFMADD213PS, CPU Feature: AVX512F
|
// Asm: VFMADD213PS, CPU Feature: AVX512F
|
||||||
func (x Float32x8) FusedMultiplyAdd(y Float32x8, z Float32x8) Float32x8
|
func (x Float32x8) FusedMultiplyAdd(y Float32x8, z Float32x8) Float32x8
|
||||||
|
|
||||||
// FusedMultiplyAdd performs `(v1 * v2) + v3`.
|
// FusedMultiplyAdd performs (x * y) + z.
|
||||||
//
|
//
|
||||||
// Asm: VFMADD213PS, CPU Feature: AVX512F
|
// Asm: VFMADD213PS, CPU Feature: AVX512F
|
||||||
func (x Float32x16) FusedMultiplyAdd(y Float32x16, z Float32x16) Float32x16
|
func (x Float32x16) FusedMultiplyAdd(y Float32x16, z Float32x16) Float32x16
|
||||||
|
|
||||||
// FusedMultiplyAdd performs `(v1 * v2) + v3`.
|
// FusedMultiplyAdd performs (x * y) + z.
|
||||||
//
|
//
|
||||||
// Asm: VFMADD213PD, CPU Feature: AVX512F
|
// Asm: VFMADD213PD, CPU Feature: AVX512F
|
||||||
func (x Float64x2) FusedMultiplyAdd(y Float64x2, z Float64x2) Float64x2
|
func (x Float64x2) FusedMultiplyAdd(y Float64x2, z Float64x2) Float64x2
|
||||||
|
|
||||||
// FusedMultiplyAdd performs `(v1 * v2) + v3`.
|
// FusedMultiplyAdd performs (x * y) + z.
|
||||||
//
|
//
|
||||||
// Asm: VFMADD213PD, CPU Feature: AVX512F
|
// Asm: VFMADD213PD, CPU Feature: AVX512F
|
||||||
func (x Float64x4) FusedMultiplyAdd(y Float64x4, z Float64x4) Float64x4
|
func (x Float64x4) FusedMultiplyAdd(y Float64x4, z Float64x4) Float64x4
|
||||||
|
|
||||||
// FusedMultiplyAdd performs `(v1 * v2) + v3`.
|
// FusedMultiplyAdd performs (x * y) + z.
|
||||||
//
|
//
|
||||||
// Asm: VFMADD213PD, CPU Feature: AVX512F
|
// Asm: VFMADD213PD, CPU Feature: AVX512F
|
||||||
func (x Float64x8) FusedMultiplyAdd(y Float64x8, z Float64x8) Float64x8
|
func (x Float64x8) FusedMultiplyAdd(y Float64x8, z Float64x8) Float64x8
|
||||||
|
|
||||||
/* FusedMultiplyAddMasked */
|
/* FusedMultiplyAddMasked */
|
||||||
|
|
||||||
// FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
|
// FusedMultiplyAddMasked performs (x * y) + z.
|
||||||
//
|
//
|
||||||
// Asm: VFMADD213PS, CPU Feature: AVX512F
|
// Asm: VFMADD213PS, CPU Feature: AVX512F
|
||||||
func (x Float32x4) FusedMultiplyAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
|
func (x Float32x4) FusedMultiplyAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
|
||||||
|
|
||||||
// FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
|
// FusedMultiplyAddMasked performs (x * y) + z.
|
||||||
//
|
//
|
||||||
// Asm: VFMADD213PS, CPU Feature: AVX512F
|
// Asm: VFMADD213PS, CPU Feature: AVX512F
|
||||||
func (x Float32x8) FusedMultiplyAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
|
func (x Float32x8) FusedMultiplyAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
|
||||||
|
|
||||||
// FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
|
// FusedMultiplyAddMasked performs (x * y) + z.
|
||||||
//
|
//
|
||||||
// Asm: VFMADD213PS, CPU Feature: AVX512F
|
// Asm: VFMADD213PS, CPU Feature: AVX512F
|
||||||
func (x Float32x16) FusedMultiplyAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
|
func (x Float32x16) FusedMultiplyAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
|
||||||
|
|
||||||
// FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
|
// FusedMultiplyAddMasked performs (x * y) + z.
|
||||||
//
|
//
|
||||||
// Asm: VFMADD213PD, CPU Feature: AVX512F
|
// Asm: VFMADD213PD, CPU Feature: AVX512F
|
||||||
func (x Float64x2) FusedMultiplyAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
|
func (x Float64x2) FusedMultiplyAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
|
||||||
|
|
||||||
// FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
|
// FusedMultiplyAddMasked performs (x * y) + z.
|
||||||
//
|
//
|
||||||
// Asm: VFMADD213PD, CPU Feature: AVX512F
|
// Asm: VFMADD213PD, CPU Feature: AVX512F
|
||||||
func (x Float64x4) FusedMultiplyAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
|
func (x Float64x4) FusedMultiplyAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
|
||||||
|
|
||||||
// FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
|
// FusedMultiplyAddMasked performs (x * y) + z.
|
||||||
//
|
//
|
||||||
// Asm: VFMADD213PD, CPU Feature: AVX512F
|
// Asm: VFMADD213PD, CPU Feature: AVX512F
|
||||||
func (x Float64x8) FusedMultiplyAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
|
func (x Float64x8) FusedMultiplyAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
|
||||||
|
|
||||||
/* FusedMultiplyAddSub */
|
/* FusedMultiplyAddSub */
|
||||||
|
|
||||||
// FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
|
// FusedMultiplyAddSub performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
|
||||||
//
|
//
|
||||||
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
|
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
|
||||||
func (x Float32x4) FusedMultiplyAddSub(y Float32x4, z Float32x4) Float32x4
|
func (x Float32x4) FusedMultiplyAddSub(y Float32x4, z Float32x4) Float32x4
|
||||||
|
|
||||||
// FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
|
// FusedMultiplyAddSub performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
|
||||||
//
|
//
|
||||||
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
|
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
|
||||||
func (x Float32x8) FusedMultiplyAddSub(y Float32x8, z Float32x8) Float32x8
|
func (x Float32x8) FusedMultiplyAddSub(y Float32x8, z Float32x8) Float32x8
|
||||||
|
|
||||||
// FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
|
// FusedMultiplyAddSub performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
|
||||||
//
|
//
|
||||||
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
|
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
|
||||||
func (x Float32x16) FusedMultiplyAddSub(y Float32x16, z Float32x16) Float32x16
|
func (x Float32x16) FusedMultiplyAddSub(y Float32x16, z Float32x16) Float32x16
|
||||||
|
|
||||||
// FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
|
// FusedMultiplyAddSub performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
|
||||||
//
|
//
|
||||||
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
|
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
|
||||||
func (x Float64x2) FusedMultiplyAddSub(y Float64x2, z Float64x2) Float64x2
|
func (x Float64x2) FusedMultiplyAddSub(y Float64x2, z Float64x2) Float64x2
|
||||||
|
|
||||||
// FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
|
// FusedMultiplyAddSub performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
|
||||||
//
|
//
|
||||||
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
|
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
|
||||||
func (x Float64x4) FusedMultiplyAddSub(y Float64x4, z Float64x4) Float64x4
|
func (x Float64x4) FusedMultiplyAddSub(y Float64x4, z Float64x4) Float64x4
|
||||||
|
|
||||||
// FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
|
// FusedMultiplyAddSub performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
|
||||||
//
|
//
|
||||||
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
|
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
|
||||||
func (x Float64x8) FusedMultiplyAddSub(y Float64x8, z Float64x8) Float64x8
|
func (x Float64x8) FusedMultiplyAddSub(y Float64x8, z Float64x8) Float64x8
|
||||||
|
|
||||||
/* FusedMultiplyAddSubMasked */
|
/* FusedMultiplyAddSubMasked */
|
||||||
|
|
||||||
// FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
|
// FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
|
||||||
//
|
//
|
||||||
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
|
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
|
||||||
func (x Float32x4) FusedMultiplyAddSubMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
|
func (x Float32x4) FusedMultiplyAddSubMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
|
||||||
|
|
||||||
// FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
|
// FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
|
||||||
//
|
//
|
||||||
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
|
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
|
||||||
func (x Float32x8) FusedMultiplyAddSubMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
|
func (x Float32x8) FusedMultiplyAddSubMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
|
||||||
|
|
||||||
// FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
|
// FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
|
||||||
//
|
//
|
||||||
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
|
// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
|
||||||
func (x Float32x16) FusedMultiplyAddSubMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
|
func (x Float32x16) FusedMultiplyAddSubMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
|
||||||
|
|
||||||
// FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
|
// FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
|
||||||
//
|
//
|
||||||
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
|
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
|
||||||
func (x Float64x2) FusedMultiplyAddSubMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
|
func (x Float64x2) FusedMultiplyAddSubMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
|
||||||
|
|
||||||
// FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
|
// FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
|
||||||
//
|
//
|
||||||
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
|
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
|
||||||
func (x Float64x4) FusedMultiplyAddSubMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
|
func (x Float64x4) FusedMultiplyAddSubMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
|
||||||
|
|
||||||
// FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
|
// FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
|
||||||
//
|
//
|
||||||
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
|
// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
|
||||||
func (x Float64x8) FusedMultiplyAddSubMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
|
func (x Float64x8) FusedMultiplyAddSubMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
|
||||||
|
|
||||||
/* FusedMultiplySubAdd */
|
/* FusedMultiplySubAdd */
|
||||||
|
|
||||||
// FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
|
// FusedMultiplySubAdd performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
|
||||||
//
|
//
|
||||||
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
|
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
|
||||||
func (x Float32x4) FusedMultiplySubAdd(y Float32x4, z Float32x4) Float32x4
|
func (x Float32x4) FusedMultiplySubAdd(y Float32x4, z Float32x4) Float32x4
|
||||||
|
|
||||||
// FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
|
// FusedMultiplySubAdd performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
|
||||||
//
|
//
|
||||||
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
|
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
|
||||||
func (x Float32x8) FusedMultiplySubAdd(y Float32x8, z Float32x8) Float32x8
|
func (x Float32x8) FusedMultiplySubAdd(y Float32x8, z Float32x8) Float32x8
|
||||||
|
|
||||||
// FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
|
// FusedMultiplySubAdd performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
|
||||||
//
|
//
|
||||||
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
|
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
|
||||||
func (x Float32x16) FusedMultiplySubAdd(y Float32x16, z Float32x16) Float32x16
|
func (x Float32x16) FusedMultiplySubAdd(y Float32x16, z Float32x16) Float32x16
|
||||||
|
|
||||||
// FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
|
// FusedMultiplySubAdd performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
|
||||||
//
|
//
|
||||||
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
|
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
|
||||||
func (x Float64x2) FusedMultiplySubAdd(y Float64x2, z Float64x2) Float64x2
|
func (x Float64x2) FusedMultiplySubAdd(y Float64x2, z Float64x2) Float64x2
|
||||||
|
|
||||||
// FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
|
// FusedMultiplySubAdd performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
|
||||||
//
|
//
|
||||||
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
|
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
|
||||||
func (x Float64x4) FusedMultiplySubAdd(y Float64x4, z Float64x4) Float64x4
|
func (x Float64x4) FusedMultiplySubAdd(y Float64x4, z Float64x4) Float64x4
|
||||||
|
|
||||||
// FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
|
// FusedMultiplySubAdd performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
|
||||||
//
|
//
|
||||||
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
|
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
|
||||||
func (x Float64x8) FusedMultiplySubAdd(y Float64x8, z Float64x8) Float64x8
|
func (x Float64x8) FusedMultiplySubAdd(y Float64x8, z Float64x8) Float64x8
|
||||||
|
|
||||||
/* FusedMultiplySubAddMasked */
|
/* FusedMultiplySubAddMasked */
|
||||||
|
|
||||||
// FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
|
// FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
|
||||||
//
|
//
|
||||||
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
|
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
|
||||||
func (x Float32x4) FusedMultiplySubAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
|
func (x Float32x4) FusedMultiplySubAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
|
||||||
|
|
||||||
// FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
|
// FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
|
||||||
//
|
//
|
||||||
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
|
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
|
||||||
func (x Float32x8) FusedMultiplySubAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
|
func (x Float32x8) FusedMultiplySubAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
|
||||||
|
|
||||||
// FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
|
// FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
|
||||||
//
|
//
|
||||||
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
|
// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
|
||||||
func (x Float32x16) FusedMultiplySubAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
|
func (x Float32x16) FusedMultiplySubAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
|
||||||
|
|
||||||
// FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
|
// FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
|
||||||
//
|
//
|
||||||
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
|
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
|
||||||
func (x Float64x2) FusedMultiplySubAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
|
func (x Float64x2) FusedMultiplySubAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
|
||||||
|
|
||||||
// FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
|
// FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
|
||||||
//
|
//
|
||||||
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
|
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
|
||||||
func (x Float64x4) FusedMultiplySubAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
|
func (x Float64x4) FusedMultiplySubAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
|
||||||
|
|
||||||
// FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
|
// FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
|
||||||
//
|
//
|
||||||
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
|
// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
|
||||||
func (x Float64x8) FusedMultiplySubAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
|
func (x Float64x8) FusedMultiplySubAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
|
||||||
|
|
@ -5373,37 +5373,37 @@ func (x Int16x32) PairDotProd(y Int16x32) Int32x16
|
||||||
|
|
||||||
/* PairDotProdAccumulate */
|
/* PairDotProdAccumulate */
|
||||||
|
|
||||||
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
// PairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
|
||||||
//
|
//
|
||||||
// Asm: VPDPWSSD, CPU Feature: AVXVNNI
|
// Asm: VPDPWSSD, CPU Feature: AVXVNNI
|
||||||
func (x Int32x4) PairDotProdAccumulate(y Int16x8, z Int16x8) Int32x4
|
func (x Int16x8) PairDotProdAccumulate(y Int16x8, z Int32x4) Int32x4
|
||||||
|
|
||||||
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
// PairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
|
||||||
//
|
//
|
||||||
// Asm: VPDPWSSD, CPU Feature: AVXVNNI
|
// Asm: VPDPWSSD, CPU Feature: AVXVNNI
|
||||||
func (x Int32x8) PairDotProdAccumulate(y Int16x16, z Int16x16) Int32x8
|
func (x Int16x16) PairDotProdAccumulate(y Int16x16, z Int32x8) Int32x8
|
||||||
|
|
||||||
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
// PairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
|
||||||
//
|
//
|
||||||
// Asm: VPDPWSSD, CPU Feature: AVX512VNNI
|
// Asm: VPDPWSSD, CPU Feature: AVX512VNNI
|
||||||
func (x Int32x16) PairDotProdAccumulate(y Int16x32, z Int16x32) Int32x16
|
func (x Int16x32) PairDotProdAccumulate(y Int16x32, z Int32x16) Int32x16
|
||||||
|
|
||||||
/* PairDotProdAccumulateMasked */
|
/* PairDotProdAccumulateMasked */
|
||||||
|
|
||||||
// PairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
|
// PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
|
||||||
//
|
//
|
||||||
// Asm: VPDPWSSD, CPU Feature: AVX512VNNI
|
// Asm: VPDPWSSD, CPU Feature: AVX512VNNI
|
||||||
func (x Int32x4) PairDotProdAccumulateMasked(y Int16x8, z Int16x8, mask Mask32x4) Int32x4
|
func (x Int16x8) PairDotProdAccumulateMasked(y Int16x8, z Int32x4, mask Mask32x4) Int32x4
|
||||||
|
|
||||||
// PairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
|
// PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
|
||||||
//
|
//
|
||||||
// Asm: VPDPWSSD, CPU Feature: AVX512VNNI
|
// Asm: VPDPWSSD, CPU Feature: AVX512VNNI
|
||||||
func (x Int32x8) PairDotProdAccumulateMasked(y Int16x16, z Int16x16, mask Mask32x8) Int32x8
|
func (x Int16x16) PairDotProdAccumulateMasked(y Int16x16, z Int32x8, mask Mask32x8) Int32x8
|
||||||
|
|
||||||
// PairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
|
// PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
|
||||||
//
|
//
|
||||||
// Asm: VPDPWSSD, CPU Feature: AVX512VNNI
|
// Asm: VPDPWSSD, CPU Feature: AVX512VNNI
|
||||||
func (x Int32x16) PairDotProdAccumulateMasked(y Int16x32, z Int16x32, mask Mask32x16) Int32x16
|
func (x Int16x32) PairDotProdAccumulateMasked(y Int16x32, z Int32x16, mask Mask32x16) Int32x16
|
||||||
|
|
||||||
/* PairDotProdMasked */
|
/* PairDotProdMasked */
|
||||||
|
|
||||||
|
|
@ -7469,37 +7469,37 @@ func (x Uint16x32) SaturatedAddMasked(y Uint16x32, mask Mask16x32) Uint16x32
|
||||||
|
|
||||||
/* SaturatedPairDotProdAccumulate */
|
/* SaturatedPairDotProdAccumulate */
|
||||||
|
|
||||||
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
|
||||||
//
|
//
|
||||||
// Asm: VPDPWSSDS, CPU Feature: AVXVNNI
|
// Asm: VPDPWSSDS, CPU Feature: AVXVNNI
|
||||||
func (x Int32x4) SaturatedPairDotProdAccumulate(y Int16x8, z Int16x8) Int32x4
|
func (x Int16x8) SaturatedPairDotProdAccumulate(y Int16x8, z Int32x4) Int32x4
|
||||||
|
|
||||||
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
|
||||||
//
|
//
|
||||||
// Asm: VPDPWSSDS, CPU Feature: AVXVNNI
|
// Asm: VPDPWSSDS, CPU Feature: AVXVNNI
|
||||||
func (x Int32x8) SaturatedPairDotProdAccumulate(y Int16x16, z Int16x16) Int32x8
|
func (x Int16x16) SaturatedPairDotProdAccumulate(y Int16x16, z Int32x8) Int32x8
|
||||||
|
|
||||||
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
|
||||||
//
|
//
|
||||||
// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
|
// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
|
||||||
func (x Int32x16) SaturatedPairDotProdAccumulate(y Int16x32, z Int16x32) Int32x16
|
func (x Int16x32) SaturatedPairDotProdAccumulate(y Int16x32, z Int32x16) Int32x16
|
||||||
|
|
||||||
/* SaturatedPairDotProdAccumulateMasked */
|
/* SaturatedPairDotProdAccumulateMasked */
|
||||||
|
|
||||||
// SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
|
// SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
|
||||||
//
|
//
|
||||||
// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
|
// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
|
||||||
func (x Int32x4) SaturatedPairDotProdAccumulateMasked(y Int16x8, z Int16x8, mask Mask32x4) Int32x4
|
func (x Int16x8) SaturatedPairDotProdAccumulateMasked(y Int16x8, z Int32x4, mask Mask32x4) Int32x4
|
||||||
|
|
||||||
// SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
|
// SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
|
||||||
//
|
//
|
||||||
// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
|
// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
|
||||||
func (x Int32x8) SaturatedPairDotProdAccumulateMasked(y Int16x16, z Int16x16, mask Mask32x8) Int32x8
|
func (x Int16x16) SaturatedPairDotProdAccumulateMasked(y Int16x16, z Int32x8, mask Mask32x8) Int32x8
|
||||||
|
|
||||||
// SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
|
// SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
|
||||||
//
|
//
|
||||||
// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
|
// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
|
||||||
func (x Int32x16) SaturatedPairDotProdAccumulateMasked(y Int16x32, z Int16x32, mask Mask32x16) Int32x16
|
func (x Int16x32) SaturatedPairDotProdAccumulateMasked(y Int16x32, z Int32x16, mask Mask32x16) Int32x16
|
||||||
|
|
||||||
/* SaturatedPairwiseAdd */
|
/* SaturatedPairwiseAdd */
|
||||||
|
|
||||||
|
|
@ -7695,67 +7695,37 @@ func (x Uint8x64) SaturatedUnsignedSignedPairDotProdMasked(y Int8x64, mask Mask1
|
||||||
|
|
||||||
/* SaturatedUnsignedSignedQuadDotProdAccumulate */
|
/* SaturatedUnsignedSignedQuadDotProdAccumulate */
|
||||||
|
|
||||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
|
// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
|
||||||
func (x Int32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16) Int32x4
|
func (x Int8x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4) Int32x4
|
||||||
|
|
||||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
|
// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
|
||||||
func (x Int32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32) Int32x8
|
func (x Int8x32) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8) Int32x8
|
||||||
|
|
||||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
|
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
|
||||||
func (x Int32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Int32x16
|
func (x Int8x64) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16
|
||||||
|
|
||||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
|
||||||
//
|
|
||||||
// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
|
|
||||||
func (x Uint32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16) Uint32x4
|
|
||||||
|
|
||||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
|
||||||
//
|
|
||||||
// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
|
|
||||||
func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32) Uint32x8
|
|
||||||
|
|
||||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
|
||||||
//
|
|
||||||
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
|
|
||||||
func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Uint32x16
|
|
||||||
|
|
||||||
/* SaturatedUnsignedSignedQuadDotProdAccumulateMasked */
|
/* SaturatedUnsignedSignedQuadDotProdAccumulateMasked */
|
||||||
|
|
||||||
// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
|
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
|
||||||
func (x Int32x4) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, mask Mask32x4) Int32x4
|
func (x Int8x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int32x4, mask Mask32x4) Int32x4
|
||||||
|
|
||||||
// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
|
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
|
||||||
func (x Int32x8) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, mask Mask32x8) Int32x8
|
func (x Int8x32) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int32x8, mask Mask32x8) Int32x8
|
||||||
|
|
||||||
// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
|
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
|
||||||
func (x Int32x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, mask Mask32x16) Int32x16
|
func (x Int8x64) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int32x16, mask Mask32x16) Int32x16
|
||||||
|
|
||||||
// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
|
||||||
//
|
|
||||||
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
|
|
||||||
func (x Uint32x4) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, mask Mask32x4) Uint32x4
|
|
||||||
|
|
||||||
// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
|
||||||
//
|
|
||||||
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
|
|
||||||
func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, mask Mask32x8) Uint32x8
|
|
||||||
|
|
||||||
// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
|
||||||
//
|
|
||||||
// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
|
|
||||||
func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, mask Mask32x16) Uint32x16
|
|
||||||
|
|
||||||
/* Set128 */
|
/* Set128 */
|
||||||
|
|
||||||
|
|
@ -10165,67 +10135,37 @@ func (x Float64x8) TruncWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
|
||||||
|
|
||||||
/* UnsignedSignedQuadDotProdAccumulate */
|
/* UnsignedSignedQuadDotProdAccumulate */
|
||||||
|
|
||||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSD, CPU Feature: AVXVNNI
|
// Asm: VPDPBUSD, CPU Feature: AVXVNNI
|
||||||
func (x Int32x4) UnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16) Int32x4
|
func (x Int8x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4) Int32x4
|
||||||
|
|
||||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSD, CPU Feature: AVXVNNI
|
// Asm: VPDPBUSD, CPU Feature: AVXVNNI
|
||||||
func (x Int32x8) UnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32) Int32x8
|
func (x Int8x32) UnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8) Int32x8
|
||||||
|
|
||||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
|
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
|
||||||
func (x Int32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Int32x16
|
func (x Int8x64) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16
|
||||||
|
|
||||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
|
||||||
//
|
|
||||||
// Asm: VPDPBUSD, CPU Feature: AVXVNNI
|
|
||||||
func (x Uint32x4) UnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16) Uint32x4
|
|
||||||
|
|
||||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
|
||||||
//
|
|
||||||
// Asm: VPDPBUSD, CPU Feature: AVXVNNI
|
|
||||||
func (x Uint32x8) UnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32) Uint32x8
|
|
||||||
|
|
||||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
|
||||||
//
|
|
||||||
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
|
|
||||||
func (x Uint32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Uint32x16
|
|
||||||
|
|
||||||
/* UnsignedSignedQuadDotProdAccumulateMasked */
|
/* UnsignedSignedQuadDotProdAccumulateMasked */
|
||||||
|
|
||||||
// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
|
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
|
||||||
func (x Int32x4) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, mask Mask32x4) Int32x4
|
func (x Int8x16) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int32x4, mask Mask32x4) Int32x4
|
||||||
|
|
||||||
// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
|
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
|
||||||
func (x Int32x8) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, mask Mask32x8) Int32x8
|
func (x Int8x32) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int32x8, mask Mask32x8) Int32x8
|
||||||
|
|
||||||
// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z.
|
||||||
//
|
//
|
||||||
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
|
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
|
||||||
func (x Int32x16) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, mask Mask32x16) Int32x16
|
func (x Int8x64) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int32x16, mask Mask32x16) Int32x16
|
||||||
|
|
||||||
// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
|
||||||
//
|
|
||||||
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
|
|
||||||
func (x Uint32x4) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, mask Mask32x4) Uint32x4
|
|
||||||
|
|
||||||
// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
|
||||||
//
|
|
||||||
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
|
|
||||||
func (x Uint32x8) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, mask Mask32x8) Uint32x8
|
|
||||||
|
|
||||||
// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
|
||||||
//
|
|
||||||
// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
|
|
||||||
func (x Uint32x16) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, mask Mask32x16) Uint32x16
|
|
||||||
|
|
||||||
/* Xor */
|
/* Xor */
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -202,6 +202,25 @@ func TestAndNot(t *testing.T) {
|
||||||
[]int32{0b10, 0b00, 0b10, 0b00}, "AndNot")
|
[]int32{0b10, 0b00, 0b10, 0b00}, "AndNot")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestPairDotProdAccumulate(t *testing.T) {
|
||||||
|
if !simd.HasAVX512GFNI() {
|
||||||
|
// TODO: this function is actually VNNI, let's implement and call the right check.
|
||||||
|
t.Skip("Test requires HasAVX512GFNI, not available on this hardware")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
x := simd.LoadInt16x8Slice([]int16{2, 2, 2, 2, 2, 2, 2, 2})
|
||||||
|
z := simd.LoadInt32x4Slice([]int32{3, 3, 3, 3})
|
||||||
|
want := []int32{11, 11, 11, 11}
|
||||||
|
got := make([]int32, 4)
|
||||||
|
z = x.PairDotProdAccumulate(x, z)
|
||||||
|
z.StoreSlice(got)
|
||||||
|
for i := range 4 {
|
||||||
|
if got[i] != want[i] {
|
||||||
|
t.Errorf("a and b differ at index %d, got=%d, want=%d", i, got[i], want[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// checkInt8Slices ensures that b and a are equal, to the end of b.
|
// checkInt8Slices ensures that b and a are equal, to the end of b.
|
||||||
// also serves to use the slices, to prevent accidental optimization.
|
// also serves to use the slices, to prevent accidental optimization.
|
||||||
func checkInt8Slices(t *testing.T, a, b []int8) {
|
func checkInt8Slices(t *testing.T, a, b []int8) {
|
||||||
|
|
|
||||||
|
|
@ -3294,55 +3294,6 @@ func testInt32x4Compare(t *testing.T, v0 []int32, v1 []int32, want []int32, whic
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func testInt32x4Int16x8Int16x8Int32x4(t *testing.T, v0 []int32, v1 []int16, v2 []int16, want []int32, which string) {
|
|
||||||
t.Helper()
|
|
||||||
var gotv simd.Int32x4
|
|
||||||
got := make([]int32, len(want))
|
|
||||||
vec0 := simd.LoadInt32x4Slice(v0)
|
|
||||||
vec1 := simd.LoadInt16x8Slice(v1)
|
|
||||||
vec2 := simd.LoadInt16x8Slice(v2)
|
|
||||||
switch which {
|
|
||||||
case "PairDotProdAccumulate":
|
|
||||||
gotv = vec0.PairDotProdAccumulate(vec1, vec2)
|
|
||||||
case "SaturatedPairDotProdAccumulate":
|
|
||||||
gotv = vec0.SaturatedPairDotProdAccumulate(vec1, vec2)
|
|
||||||
|
|
||||||
default:
|
|
||||||
t.Errorf("Unknown method: Int32x4.%s", which)
|
|
||||||
}
|
|
||||||
gotv.StoreSlice(got)
|
|
||||||
for i := range len(want) {
|
|
||||||
if got[i] != want[i] {
|
|
||||||
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func testInt32x4Int16x8Int16x8Mask32x4Int32x4(t *testing.T, v0 []int32, v1 []int16, v2 []int16, v3 []int32, want []int32, which string) {
|
|
||||||
t.Helper()
|
|
||||||
var gotv simd.Int32x4
|
|
||||||
got := make([]int32, len(want))
|
|
||||||
vec0 := simd.LoadInt32x4Slice(v0)
|
|
||||||
vec1 := simd.LoadInt16x8Slice(v1)
|
|
||||||
vec2 := simd.LoadInt16x8Slice(v2)
|
|
||||||
vec3 := simd.LoadInt32x4Slice(v3)
|
|
||||||
switch which {
|
|
||||||
case "PairDotProdAccumulateMasked":
|
|
||||||
gotv = vec0.PairDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x4())
|
|
||||||
case "SaturatedPairDotProdAccumulateMasked":
|
|
||||||
gotv = vec0.SaturatedPairDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x4())
|
|
||||||
|
|
||||||
default:
|
|
||||||
t.Errorf("Unknown method: Int32x4.%s", which)
|
|
||||||
}
|
|
||||||
gotv.StoreSlice(got)
|
|
||||||
for i := range len(want) {
|
|
||||||
if got[i] != want[i] {
|
|
||||||
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func testInt32x4Mask32x4Int32x4(t *testing.T, v0 []int32, v1 []int32, want []int32, which string) {
|
func testInt32x4Mask32x4Int32x4(t *testing.T, v0 []int32, v1 []int32, want []int32, which string) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
var gotv simd.Int32x4
|
var gotv simd.Int32x4
|
||||||
|
|
@ -3445,55 +3396,6 @@ func testInt32x4TernaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func testInt32x4Uint8x16Int8x16Int32x4(t *testing.T, v0 []int32, v1 []uint8, v2 []int8, want []int32, which string) {
|
|
||||||
t.Helper()
|
|
||||||
var gotv simd.Int32x4
|
|
||||||
got := make([]int32, len(want))
|
|
||||||
vec0 := simd.LoadInt32x4Slice(v0)
|
|
||||||
vec1 := simd.LoadUint8x16Slice(v1)
|
|
||||||
vec2 := simd.LoadInt8x16Slice(v2)
|
|
||||||
switch which {
|
|
||||||
case "SaturatedUnsignedSignedQuadDotProdAccumulate":
|
|
||||||
gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2)
|
|
||||||
case "UnsignedSignedQuadDotProdAccumulate":
|
|
||||||
gotv = vec0.UnsignedSignedQuadDotProdAccumulate(vec1, vec2)
|
|
||||||
|
|
||||||
default:
|
|
||||||
t.Errorf("Unknown method: Int32x4.%s", which)
|
|
||||||
}
|
|
||||||
gotv.StoreSlice(got)
|
|
||||||
for i := range len(want) {
|
|
||||||
if got[i] != want[i] {
|
|
||||||
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func testInt32x4Uint8x16Int8x16Mask32x4Int32x4(t *testing.T, v0 []int32, v1 []uint8, v2 []int8, v3 []int32, want []int32, which string) {
|
|
||||||
t.Helper()
|
|
||||||
var gotv simd.Int32x4
|
|
||||||
got := make([]int32, len(want))
|
|
||||||
vec0 := simd.LoadInt32x4Slice(v0)
|
|
||||||
vec1 := simd.LoadUint8x16Slice(v1)
|
|
||||||
vec2 := simd.LoadInt8x16Slice(v2)
|
|
||||||
vec3 := simd.LoadInt32x4Slice(v3)
|
|
||||||
switch which {
|
|
||||||
case "SaturatedUnsignedSignedQuadDotProdAccumulateMasked":
|
|
||||||
gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x4())
|
|
||||||
case "UnsignedSignedQuadDotProdAccumulateMasked":
|
|
||||||
gotv = vec0.UnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x4())
|
|
||||||
|
|
||||||
default:
|
|
||||||
t.Errorf("Unknown method: Int32x4.%s", which)
|
|
||||||
}
|
|
||||||
gotv.StoreSlice(got)
|
|
||||||
for i := range len(want) {
|
|
||||||
if got[i] != want[i] {
|
|
||||||
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func testInt32x4Unary(t *testing.T, v0 []int32, want []int32, which string) {
|
func testInt32x4Unary(t *testing.T, v0 []int32, want []int32, which string) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
var gotv simd.Int32x4
|
var gotv simd.Int32x4
|
||||||
|
|
@ -3688,55 +3590,6 @@ func testInt32x8Compare(t *testing.T, v0 []int32, v1 []int32, want []int32, whic
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func testInt32x8Int16x16Int16x16Int32x8(t *testing.T, v0 []int32, v1 []int16, v2 []int16, want []int32, which string) {
|
|
||||||
t.Helper()
|
|
||||||
var gotv simd.Int32x8
|
|
||||||
got := make([]int32, len(want))
|
|
||||||
vec0 := simd.LoadInt32x8Slice(v0)
|
|
||||||
vec1 := simd.LoadInt16x16Slice(v1)
|
|
||||||
vec2 := simd.LoadInt16x16Slice(v2)
|
|
||||||
switch which {
|
|
||||||
case "PairDotProdAccumulate":
|
|
||||||
gotv = vec0.PairDotProdAccumulate(vec1, vec2)
|
|
||||||
case "SaturatedPairDotProdAccumulate":
|
|
||||||
gotv = vec0.SaturatedPairDotProdAccumulate(vec1, vec2)
|
|
||||||
|
|
||||||
default:
|
|
||||||
t.Errorf("Unknown method: Int32x8.%s", which)
|
|
||||||
}
|
|
||||||
gotv.StoreSlice(got)
|
|
||||||
for i := range len(want) {
|
|
||||||
if got[i] != want[i] {
|
|
||||||
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func testInt32x8Int16x16Int16x16Mask32x8Int32x8(t *testing.T, v0 []int32, v1 []int16, v2 []int16, v3 []int32, want []int32, which string) {
|
|
||||||
t.Helper()
|
|
||||||
var gotv simd.Int32x8
|
|
||||||
got := make([]int32, len(want))
|
|
||||||
vec0 := simd.LoadInt32x8Slice(v0)
|
|
||||||
vec1 := simd.LoadInt16x16Slice(v1)
|
|
||||||
vec2 := simd.LoadInt16x16Slice(v2)
|
|
||||||
vec3 := simd.LoadInt32x8Slice(v3)
|
|
||||||
switch which {
|
|
||||||
case "PairDotProdAccumulateMasked":
|
|
||||||
gotv = vec0.PairDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x8())
|
|
||||||
case "SaturatedPairDotProdAccumulateMasked":
|
|
||||||
gotv = vec0.SaturatedPairDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x8())
|
|
||||||
|
|
||||||
default:
|
|
||||||
t.Errorf("Unknown method: Int32x8.%s", which)
|
|
||||||
}
|
|
||||||
gotv.StoreSlice(got)
|
|
||||||
for i := range len(want) {
|
|
||||||
if got[i] != want[i] {
|
|
||||||
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func testInt32x8Mask32x8Int32x8(t *testing.T, v0 []int32, v1 []int32, want []int32, which string) {
|
func testInt32x8Mask32x8Int32x8(t *testing.T, v0 []int32, v1 []int32, want []int32, which string) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
var gotv simd.Int32x8
|
var gotv simd.Int32x8
|
||||||
|
|
@ -3839,55 +3692,6 @@ func testInt32x8TernaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func testInt32x8Uint8x32Int8x32Int32x8(t *testing.T, v0 []int32, v1 []uint8, v2 []int8, want []int32, which string) {
|
|
||||||
t.Helper()
|
|
||||||
var gotv simd.Int32x8
|
|
||||||
got := make([]int32, len(want))
|
|
||||||
vec0 := simd.LoadInt32x8Slice(v0)
|
|
||||||
vec1 := simd.LoadUint8x32Slice(v1)
|
|
||||||
vec2 := simd.LoadInt8x32Slice(v2)
|
|
||||||
switch which {
|
|
||||||
case "SaturatedUnsignedSignedQuadDotProdAccumulate":
|
|
||||||
gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2)
|
|
||||||
case "UnsignedSignedQuadDotProdAccumulate":
|
|
||||||
gotv = vec0.UnsignedSignedQuadDotProdAccumulate(vec1, vec2)
|
|
||||||
|
|
||||||
default:
|
|
||||||
t.Errorf("Unknown method: Int32x8.%s", which)
|
|
||||||
}
|
|
||||||
gotv.StoreSlice(got)
|
|
||||||
for i := range len(want) {
|
|
||||||
if got[i] != want[i] {
|
|
||||||
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func testInt32x8Uint8x32Int8x32Mask32x8Int32x8(t *testing.T, v0 []int32, v1 []uint8, v2 []int8, v3 []int32, want []int32, which string) {
|
|
||||||
t.Helper()
|
|
||||||
var gotv simd.Int32x8
|
|
||||||
got := make([]int32, len(want))
|
|
||||||
vec0 := simd.LoadInt32x8Slice(v0)
|
|
||||||
vec1 := simd.LoadUint8x32Slice(v1)
|
|
||||||
vec2 := simd.LoadInt8x32Slice(v2)
|
|
||||||
vec3 := simd.LoadInt32x8Slice(v3)
|
|
||||||
switch which {
|
|
||||||
case "SaturatedUnsignedSignedQuadDotProdAccumulateMasked":
|
|
||||||
gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x8())
|
|
||||||
case "UnsignedSignedQuadDotProdAccumulateMasked":
|
|
||||||
gotv = vec0.UnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x8())
|
|
||||||
|
|
||||||
default:
|
|
||||||
t.Errorf("Unknown method: Int32x8.%s", which)
|
|
||||||
}
|
|
||||||
gotv.StoreSlice(got)
|
|
||||||
for i := range len(want) {
|
|
||||||
if got[i] != want[i] {
|
|
||||||
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func testInt32x8Unary(t *testing.T, v0 []int32, want []int32, which string) {
|
func testInt32x8Unary(t *testing.T, v0 []int32, want []int32, which string) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
var gotv simd.Int32x8
|
var gotv simd.Int32x8
|
||||||
|
|
@ -4055,55 +3859,6 @@ func testInt32x16Compare(t *testing.T, v0 []int32, v1 []int32, want []int32, whi
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func testInt32x16Int16x32Int16x32Int32x16(t *testing.T, v0 []int32, v1 []int16, v2 []int16, want []int32, which string) {
|
|
||||||
t.Helper()
|
|
||||||
var gotv simd.Int32x16
|
|
||||||
got := make([]int32, len(want))
|
|
||||||
vec0 := simd.LoadInt32x16Slice(v0)
|
|
||||||
vec1 := simd.LoadInt16x32Slice(v1)
|
|
||||||
vec2 := simd.LoadInt16x32Slice(v2)
|
|
||||||
switch which {
|
|
||||||
case "PairDotProdAccumulate":
|
|
||||||
gotv = vec0.PairDotProdAccumulate(vec1, vec2)
|
|
||||||
case "SaturatedPairDotProdAccumulate":
|
|
||||||
gotv = vec0.SaturatedPairDotProdAccumulate(vec1, vec2)
|
|
||||||
|
|
||||||
default:
|
|
||||||
t.Errorf("Unknown method: Int32x16.%s", which)
|
|
||||||
}
|
|
||||||
gotv.StoreSlice(got)
|
|
||||||
for i := range len(want) {
|
|
||||||
if got[i] != want[i] {
|
|
||||||
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func testInt32x16Int16x32Int16x32Mask32x16Int32x16(t *testing.T, v0 []int32, v1 []int16, v2 []int16, v3 []int32, want []int32, which string) {
|
|
||||||
t.Helper()
|
|
||||||
var gotv simd.Int32x16
|
|
||||||
got := make([]int32, len(want))
|
|
||||||
vec0 := simd.LoadInt32x16Slice(v0)
|
|
||||||
vec1 := simd.LoadInt16x32Slice(v1)
|
|
||||||
vec2 := simd.LoadInt16x32Slice(v2)
|
|
||||||
vec3 := simd.LoadInt32x16Slice(v3)
|
|
||||||
switch which {
|
|
||||||
case "PairDotProdAccumulateMasked":
|
|
||||||
gotv = vec0.PairDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x16())
|
|
||||||
case "SaturatedPairDotProdAccumulateMasked":
|
|
||||||
gotv = vec0.SaturatedPairDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x16())
|
|
||||||
|
|
||||||
default:
|
|
||||||
t.Errorf("Unknown method: Int32x16.%s", which)
|
|
||||||
}
|
|
||||||
gotv.StoreSlice(got)
|
|
||||||
for i := range len(want) {
|
|
||||||
if got[i] != want[i] {
|
|
||||||
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func testInt32x16Mask32x16Int32x16(t *testing.T, v0 []int32, v1 []int32, want []int32, which string) {
|
func testInt32x16Mask32x16Int32x16(t *testing.T, v0 []int32, v1 []int32, want []int32, which string) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
var gotv simd.Int32x16
|
var gotv simd.Int32x16
|
||||||
|
|
@ -4206,55 +3961,6 @@ func testInt32x16TernaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func testInt32x16Uint8x64Int8x64Int32x16(t *testing.T, v0 []int32, v1 []uint8, v2 []int8, want []int32, which string) {
|
|
||||||
t.Helper()
|
|
||||||
var gotv simd.Int32x16
|
|
||||||
got := make([]int32, len(want))
|
|
||||||
vec0 := simd.LoadInt32x16Slice(v0)
|
|
||||||
vec1 := simd.LoadUint8x64Slice(v1)
|
|
||||||
vec2 := simd.LoadInt8x64Slice(v2)
|
|
||||||
switch which {
|
|
||||||
case "SaturatedUnsignedSignedQuadDotProdAccumulate":
|
|
||||||
gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2)
|
|
||||||
case "UnsignedSignedQuadDotProdAccumulate":
|
|
||||||
gotv = vec0.UnsignedSignedQuadDotProdAccumulate(vec1, vec2)
|
|
||||||
|
|
||||||
default:
|
|
||||||
t.Errorf("Unknown method: Int32x16.%s", which)
|
|
||||||
}
|
|
||||||
gotv.StoreSlice(got)
|
|
||||||
for i := range len(want) {
|
|
||||||
if got[i] != want[i] {
|
|
||||||
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func testInt32x16Uint8x64Int8x64Mask32x16Int32x16(t *testing.T, v0 []int32, v1 []uint8, v2 []int8, v3 []int32, want []int32, which string) {
|
|
||||||
t.Helper()
|
|
||||||
var gotv simd.Int32x16
|
|
||||||
got := make([]int32, len(want))
|
|
||||||
vec0 := simd.LoadInt32x16Slice(v0)
|
|
||||||
vec1 := simd.LoadUint8x64Slice(v1)
|
|
||||||
vec2 := simd.LoadInt8x64Slice(v2)
|
|
||||||
vec3 := simd.LoadInt32x16Slice(v3)
|
|
||||||
switch which {
|
|
||||||
case "SaturatedUnsignedSignedQuadDotProdAccumulateMasked":
|
|
||||||
gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x16())
|
|
||||||
case "UnsignedSignedQuadDotProdAccumulateMasked":
|
|
||||||
gotv = vec0.UnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x16())
|
|
||||||
|
|
||||||
default:
|
|
||||||
t.Errorf("Unknown method: Int32x16.%s", which)
|
|
||||||
}
|
|
||||||
gotv.StoreSlice(got)
|
|
||||||
for i := range len(want) {
|
|
||||||
if got[i] != want[i] {
|
|
||||||
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func testInt32x16Unary(t *testing.T, v0 []int32, want []int32, which string) {
|
func testInt32x16Unary(t *testing.T, v0 []int32, want []int32, which string) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
var gotv simd.Int32x16
|
var gotv simd.Int32x16
|
||||||
|
|
@ -6880,55 +6586,6 @@ func testUint32x4TernaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []uint
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func testUint32x4Uint8x16Int8x16Mask32x4Uint32x4(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, v3 []int32, want []uint32, which string) {
|
|
||||||
t.Helper()
|
|
||||||
var gotv simd.Uint32x4
|
|
||||||
got := make([]uint32, len(want))
|
|
||||||
vec0 := simd.LoadUint32x4Slice(v0)
|
|
||||||
vec1 := simd.LoadUint8x16Slice(v1)
|
|
||||||
vec2 := simd.LoadInt8x16Slice(v2)
|
|
||||||
vec3 := simd.LoadInt32x4Slice(v3)
|
|
||||||
switch which {
|
|
||||||
case "SaturatedUnsignedSignedQuadDotProdAccumulateMasked":
|
|
||||||
gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x4())
|
|
||||||
case "UnsignedSignedQuadDotProdAccumulateMasked":
|
|
||||||
gotv = vec0.UnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x4())
|
|
||||||
|
|
||||||
default:
|
|
||||||
t.Errorf("Unknown method: Uint32x4.%s", which)
|
|
||||||
}
|
|
||||||
gotv.StoreSlice(got)
|
|
||||||
for i := range len(want) {
|
|
||||||
if got[i] != want[i] {
|
|
||||||
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func testUint32x4Uint8x16Int8x16Uint32x4(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, want []uint32, which string) {
|
|
||||||
t.Helper()
|
|
||||||
var gotv simd.Uint32x4
|
|
||||||
got := make([]uint32, len(want))
|
|
||||||
vec0 := simd.LoadUint32x4Slice(v0)
|
|
||||||
vec1 := simd.LoadUint8x16Slice(v1)
|
|
||||||
vec2 := simd.LoadInt8x16Slice(v2)
|
|
||||||
switch which {
|
|
||||||
case "SaturatedUnsignedSignedQuadDotProdAccumulate":
|
|
||||||
gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2)
|
|
||||||
case "UnsignedSignedQuadDotProdAccumulate":
|
|
||||||
gotv = vec0.UnsignedSignedQuadDotProdAccumulate(vec1, vec2)
|
|
||||||
|
|
||||||
default:
|
|
||||||
t.Errorf("Unknown method: Uint32x4.%s", which)
|
|
||||||
}
|
|
||||||
gotv.StoreSlice(got)
|
|
||||||
for i := range len(want) {
|
|
||||||
if got[i] != want[i] {
|
|
||||||
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func testUint32x4Unary(t *testing.T, v0 []uint32, want []uint32, which string) {
|
func testUint32x4Unary(t *testing.T, v0 []uint32, want []uint32, which string) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
var gotv simd.Uint32x4
|
var gotv simd.Uint32x4
|
||||||
|
|
@ -7215,55 +6872,6 @@ func testUint32x8TernaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []uint
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func testUint32x8Uint8x32Int8x32Mask32x8Uint32x8(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, v3 []int32, want []uint32, which string) {
|
|
||||||
t.Helper()
|
|
||||||
var gotv simd.Uint32x8
|
|
||||||
got := make([]uint32, len(want))
|
|
||||||
vec0 := simd.LoadUint32x8Slice(v0)
|
|
||||||
vec1 := simd.LoadUint8x32Slice(v1)
|
|
||||||
vec2 := simd.LoadInt8x32Slice(v2)
|
|
||||||
vec3 := simd.LoadInt32x8Slice(v3)
|
|
||||||
switch which {
|
|
||||||
case "SaturatedUnsignedSignedQuadDotProdAccumulateMasked":
|
|
||||||
gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x8())
|
|
||||||
case "UnsignedSignedQuadDotProdAccumulateMasked":
|
|
||||||
gotv = vec0.UnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x8())
|
|
||||||
|
|
||||||
default:
|
|
||||||
t.Errorf("Unknown method: Uint32x8.%s", which)
|
|
||||||
}
|
|
||||||
gotv.StoreSlice(got)
|
|
||||||
for i := range len(want) {
|
|
||||||
if got[i] != want[i] {
|
|
||||||
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func testUint32x8Uint8x32Int8x32Uint32x8(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, want []uint32, which string) {
|
|
||||||
t.Helper()
|
|
||||||
var gotv simd.Uint32x8
|
|
||||||
got := make([]uint32, len(want))
|
|
||||||
vec0 := simd.LoadUint32x8Slice(v0)
|
|
||||||
vec1 := simd.LoadUint8x32Slice(v1)
|
|
||||||
vec2 := simd.LoadInt8x32Slice(v2)
|
|
||||||
switch which {
|
|
||||||
case "SaturatedUnsignedSignedQuadDotProdAccumulate":
|
|
||||||
gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2)
|
|
||||||
case "UnsignedSignedQuadDotProdAccumulate":
|
|
||||||
gotv = vec0.UnsignedSignedQuadDotProdAccumulate(vec1, vec2)
|
|
||||||
|
|
||||||
default:
|
|
||||||
t.Errorf("Unknown method: Uint32x8.%s", which)
|
|
||||||
}
|
|
||||||
gotv.StoreSlice(got)
|
|
||||||
for i := range len(want) {
|
|
||||||
if got[i] != want[i] {
|
|
||||||
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func testUint32x8Unary(t *testing.T, v0 []uint32, want []uint32, which string) {
|
func testUint32x8Unary(t *testing.T, v0 []uint32, want []uint32, which string) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
var gotv simd.Uint32x8
|
var gotv simd.Uint32x8
|
||||||
|
|
@ -7525,55 +7133,6 @@ func testUint32x16TernaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []uin
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func testUint32x16Uint8x64Int8x64Mask32x16Uint32x16(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, v3 []int32, want []uint32, which string) {
|
|
||||||
t.Helper()
|
|
||||||
var gotv simd.Uint32x16
|
|
||||||
got := make([]uint32, len(want))
|
|
||||||
vec0 := simd.LoadUint32x16Slice(v0)
|
|
||||||
vec1 := simd.LoadUint8x64Slice(v1)
|
|
||||||
vec2 := simd.LoadInt8x64Slice(v2)
|
|
||||||
vec3 := simd.LoadInt32x16Slice(v3)
|
|
||||||
switch which {
|
|
||||||
case "SaturatedUnsignedSignedQuadDotProdAccumulateMasked":
|
|
||||||
gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x16())
|
|
||||||
case "UnsignedSignedQuadDotProdAccumulateMasked":
|
|
||||||
gotv = vec0.UnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x16())
|
|
||||||
|
|
||||||
default:
|
|
||||||
t.Errorf("Unknown method: Uint32x16.%s", which)
|
|
||||||
}
|
|
||||||
gotv.StoreSlice(got)
|
|
||||||
for i := range len(want) {
|
|
||||||
if got[i] != want[i] {
|
|
||||||
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func testUint32x16Uint8x64Int8x64Uint32x16(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, want []uint32, which string) {
|
|
||||||
t.Helper()
|
|
||||||
var gotv simd.Uint32x16
|
|
||||||
got := make([]uint32, len(want))
|
|
||||||
vec0 := simd.LoadUint32x16Slice(v0)
|
|
||||||
vec1 := simd.LoadUint8x64Slice(v1)
|
|
||||||
vec2 := simd.LoadInt8x64Slice(v2)
|
|
||||||
switch which {
|
|
||||||
case "SaturatedUnsignedSignedQuadDotProdAccumulate":
|
|
||||||
gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2)
|
|
||||||
case "UnsignedSignedQuadDotProdAccumulate":
|
|
||||||
gotv = vec0.UnsignedSignedQuadDotProdAccumulate(vec1, vec2)
|
|
||||||
|
|
||||||
default:
|
|
||||||
t.Errorf("Unknown method: Uint32x16.%s", which)
|
|
||||||
}
|
|
||||||
gotv.StoreSlice(got)
|
|
||||||
for i := range len(want) {
|
|
||||||
if got[i] != want[i] {
|
|
||||||
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func testUint32x16Unary(t *testing.T, v0 []uint32, want []uint32, which string) {
|
func testUint32x16Unary(t *testing.T, v0 []uint32, want []uint32, which string) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
var gotv simd.Uint32x16
|
var gotv simd.Uint32x16
|
||||||
|
|
@ -8430,6 +7989,8 @@ func testUint64x8UnaryMasked(t *testing.T, v0 []uint64, v1 []int64, want []uint6
|
||||||
// GaloisFieldAffineTransformMasked
|
// GaloisFieldAffineTransformMasked
|
||||||
// Get128
|
// Get128
|
||||||
// GetElem
|
// GetElem
|
||||||
|
// PairDotProdAccumulate
|
||||||
|
// PairDotProdAccumulateMasked
|
||||||
// Permute
|
// Permute
|
||||||
// Permute2
|
// Permute2
|
||||||
// Permute2Masked
|
// Permute2Masked
|
||||||
|
|
@ -8440,6 +8001,10 @@ func testUint64x8UnaryMasked(t *testing.T, v0 []uint64, v1 []int64, want []uint6
|
||||||
// RotateAllRightMasked
|
// RotateAllRightMasked
|
||||||
// RoundWithPrecision
|
// RoundWithPrecision
|
||||||
// RoundWithPrecisionMasked
|
// RoundWithPrecisionMasked
|
||||||
|
// SaturatedPairDotProdAccumulate
|
||||||
|
// SaturatedPairDotProdAccumulateMasked
|
||||||
|
// SaturatedUnsignedSignedQuadDotProdAccumulate
|
||||||
|
// SaturatedUnsignedSignedQuadDotProdAccumulateMasked
|
||||||
// Set128
|
// Set128
|
||||||
// SetElem
|
// SetElem
|
||||||
// ShiftAllLeft
|
// ShiftAllLeft
|
||||||
|
|
@ -8452,3 +8017,5 @@ func testUint64x8UnaryMasked(t *testing.T, v0 []uint64, v1 []int64, want []uint6
|
||||||
// ShiftAllRightMasked
|
// ShiftAllRightMasked
|
||||||
// TruncWithPrecision
|
// TruncWithPrecision
|
||||||
// TruncWithPrecisionMasked
|
// TruncWithPrecisionMasked
|
||||||
|
// UnsignedSignedQuadDotProdAccumulate
|
||||||
|
// UnsignedSignedQuadDotProdAccumulateMasked
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue