mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile: add more dot products
This CL is generated by CL 680215. Change-Id: Ie085e65e0473a8e96170702d7265d379ec8812ba Reviewed-on: https://go-review.googlesource.com/c/go/+/681298 Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
3df41c856e
commit
ded6e0ac71
8 changed files with 1439 additions and 0 deletions
|
|
@ -679,6 +679,34 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPCMPBMasked512:
|
||||
p = simdFp2k1k1Imm8(s, v)
|
||||
|
||||
case ssa.OpAMD64VPDPWSSD128,
|
||||
ssa.OpAMD64VPDPWSSD256,
|
||||
ssa.OpAMD64VPDPWSSD512,
|
||||
ssa.OpAMD64VPDPWSSDS128,
|
||||
ssa.OpAMD64VPDPWSSDS256,
|
||||
ssa.OpAMD64VPDPWSSDS512,
|
||||
ssa.OpAMD64VPDPBUSDS128,
|
||||
ssa.OpAMD64VPDPBUSDS256,
|
||||
ssa.OpAMD64VPDPBUSDS512,
|
||||
ssa.OpAMD64VPDPBUSD128,
|
||||
ssa.OpAMD64VPDPBUSD256,
|
||||
ssa.OpAMD64VPDPBUSD512:
|
||||
p = simdFp31ResultInArg0(s, v)
|
||||
|
||||
case ssa.OpAMD64VPDPWSSDMasked512,
|
||||
ssa.OpAMD64VPDPWSSDMasked128,
|
||||
ssa.OpAMD64VPDPWSSDMasked256,
|
||||
ssa.OpAMD64VPDPWSSDSMasked512,
|
||||
ssa.OpAMD64VPDPWSSDSMasked128,
|
||||
ssa.OpAMD64VPDPWSSDSMasked256,
|
||||
ssa.OpAMD64VPDPBUSDSMasked512,
|
||||
ssa.OpAMD64VPDPBUSDSMasked128,
|
||||
ssa.OpAMD64VPDPBUSDSMasked256,
|
||||
ssa.OpAMD64VPDPBUSDMasked512,
|
||||
ssa.OpAMD64VPDPBUSDMasked128,
|
||||
ssa.OpAMD64VPDPBUSDMasked256:
|
||||
p = simdFp3k1fp1ResultInArg0(s, v)
|
||||
|
||||
default:
|
||||
// Unknown reg shape
|
||||
return false
|
||||
|
|
@ -884,6 +912,9 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPMADDWDMasked256,
|
||||
ssa.OpAMD64VPMADDWDMasked512,
|
||||
ssa.OpAMD64VPMADDWDMasked128,
|
||||
ssa.OpAMD64VPDPWSSDMasked512,
|
||||
ssa.OpAMD64VPDPWSSDMasked128,
|
||||
ssa.OpAMD64VPDPWSSDMasked256,
|
||||
ssa.OpAMD64VPOPCNTWMasked256,
|
||||
ssa.OpAMD64VPOPCNTWMasked512,
|
||||
ssa.OpAMD64VPOPCNTWMasked128,
|
||||
|
|
@ -902,6 +933,9 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPADDSBMasked128,
|
||||
ssa.OpAMD64VPADDSBMasked256,
|
||||
ssa.OpAMD64VPADDSBMasked512,
|
||||
ssa.OpAMD64VPDPWSSDSMasked512,
|
||||
ssa.OpAMD64VPDPWSSDSMasked128,
|
||||
ssa.OpAMD64VPDPWSSDSMasked256,
|
||||
ssa.OpAMD64VPSUBSWMasked256,
|
||||
ssa.OpAMD64VPSUBSWMasked512,
|
||||
ssa.OpAMD64VPSUBSWMasked128,
|
||||
|
|
@ -911,6 +945,9 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPMADDUBSWMasked256,
|
||||
ssa.OpAMD64VPMADDUBSWMasked512,
|
||||
ssa.OpAMD64VPMADDUBSWMasked128,
|
||||
ssa.OpAMD64VPDPBUSDSMasked512,
|
||||
ssa.OpAMD64VPDPBUSDSMasked128,
|
||||
ssa.OpAMD64VPDPBUSDSMasked256,
|
||||
ssa.OpAMD64VSQRTPSMasked512,
|
||||
ssa.OpAMD64VSQRTPSMasked128,
|
||||
ssa.OpAMD64VSQRTPSMasked256,
|
||||
|
|
@ -929,6 +966,9 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPSUBBMasked128,
|
||||
ssa.OpAMD64VPSUBBMasked256,
|
||||
ssa.OpAMD64VPSUBBMasked512,
|
||||
ssa.OpAMD64VPDPBUSDMasked512,
|
||||
ssa.OpAMD64VPDPBUSDMasked128,
|
||||
ssa.OpAMD64VPDPBUSDMasked256,
|
||||
ssa.OpAMD64VXORPSMasked512,
|
||||
ssa.OpAMD64VXORPSMasked128,
|
||||
ssa.OpAMD64VXORPSMasked256,
|
||||
|
|
|
|||
|
|
@ -833,6 +833,9 @@
|
|||
(MaskedPairDotProdInt16x16 x y mask) => (VPMADDWDMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(MaskedPairDotProdInt16x32 x y mask) => (VPMADDWDMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
|
||||
(MaskedPairDotProdInt16x8 x y mask) => (VPMADDWDMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(MaskedPairDotProdAccumulateInt32x16 x y z mask) => (VPDPWSSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(MaskedPairDotProdAccumulateInt32x4 x y z mask) => (VPDPWSSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(MaskedPairDotProdAccumulateInt32x8 x y z mask) => (VPDPWSSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(MaskedPopCountInt16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(MaskedPopCountInt16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
|
||||
(MaskedPopCountInt16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
|
|
@ -881,6 +884,9 @@
|
|||
(MaskedSaturatedAddUint8x16 x y mask) => (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(MaskedSaturatedAddUint8x32 x y mask) => (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(MaskedSaturatedAddUint8x64 x y mask) => (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
|
||||
(MaskedSaturatedPairDotProdAccumulateInt32x16 x y z mask) => (VPDPWSSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(MaskedSaturatedPairDotProdAccumulateInt32x4 x y z mask) => (VPDPWSSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(MaskedSaturatedPairDotProdAccumulateInt32x8 x y z mask) => (VPDPWSSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(MaskedSaturatedSubInt16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(MaskedSaturatedSubInt16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
|
||||
(MaskedSaturatedSubInt16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
|
|
@ -896,6 +902,12 @@
|
|||
(MaskedSaturatedUnsignedSignedPairDotProdUint16x16 x y mask) => (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(MaskedSaturatedUnsignedSignedPairDotProdUint16x32 x y mask) => (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
|
||||
(MaskedSaturatedUnsignedSignedPairDotProdUint16x8 x y mask) => (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(MaskedSqrtFloat32x16 x mask) => (VSQRTPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(MaskedSqrtFloat32x4 x mask) => (VSQRTPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(MaskedSqrtFloat32x8 x mask) => (VSQRTPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
|
|
@ -944,6 +956,12 @@
|
|||
(MaskedTruncWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(MaskedTruncWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(MaskedTruncWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(MaskedUnsignedSignedQuadDotProdAccumulateInt32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(MaskedUnsignedSignedQuadDotProdAccumulateInt32x4 x y z mask) => (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(MaskedUnsignedSignedQuadDotProdAccumulateInt32x8 x y z mask) => (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(MaskedUnsignedSignedQuadDotProdAccumulateUint32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(MaskedUnsignedSignedQuadDotProdAccumulateUint32x4 x y z mask) => (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(MaskedUnsignedSignedQuadDotProdAccumulateUint32x8 x y z mask) => (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(MaskedXorFloat32x16 x y mask) => (VXORPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(MaskedXorFloat32x4 x y mask) => (VXORPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(MaskedXorFloat32x8 x y mask) => (VXORPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
|
|
@ -1118,6 +1136,9 @@
|
|||
(PairDotProdInt16x16 ...) => (VPMADDWD256 ...)
|
||||
(PairDotProdInt16x32 ...) => (VPMADDWD512 ...)
|
||||
(PairDotProdInt16x8 ...) => (VPMADDWD128 ...)
|
||||
(PairDotProdAccumulateInt32x16 ...) => (VPDPWSSD512 ...)
|
||||
(PairDotProdAccumulateInt32x4 ...) => (VPDPWSSD128 ...)
|
||||
(PairDotProdAccumulateInt32x8 ...) => (VPDPWSSD256 ...)
|
||||
(PairwiseAddFloat32x4 ...) => (VHADDPS128 ...)
|
||||
(PairwiseAddFloat32x8 ...) => (VHADDPS256 ...)
|
||||
(PairwiseAddFloat64x2 ...) => (VHADDPD128 ...)
|
||||
|
|
@ -1194,6 +1215,9 @@
|
|||
(SaturatedAddUint8x16 ...) => (VPADDSB128 ...)
|
||||
(SaturatedAddUint8x32 ...) => (VPADDSB256 ...)
|
||||
(SaturatedAddUint8x64 ...) => (VPADDSB512 ...)
|
||||
(SaturatedPairDotProdAccumulateInt32x16 ...) => (VPDPWSSDS512 ...)
|
||||
(SaturatedPairDotProdAccumulateInt32x4 ...) => (VPDPWSSDS128 ...)
|
||||
(SaturatedPairDotProdAccumulateInt32x8 ...) => (VPDPWSSDS256 ...)
|
||||
(SaturatedPairwiseAddInt16x16 ...) => (VPHADDSW256 ...)
|
||||
(SaturatedPairwiseAddInt16x8 ...) => (VPHADDSW128 ...)
|
||||
(SaturatedPairwiseSubInt16x16 ...) => (VPHSUBSW256 ...)
|
||||
|
|
@ -1215,6 +1239,12 @@
|
|||
(SaturatedUnsignedSignedPairDotProdUint16x8 ...) => (VPMADDUBSW128 ...)
|
||||
(SaturatedUnsignedSignedPairDotProdUint8x16 ...) => (VPMADDUBSW128 ...)
|
||||
(SaturatedUnsignedSignedPairDotProdUint8x32 ...) => (VPMADDUBSW256 ...)
|
||||
(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSDS512 ...)
|
||||
(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 ...) => (VPDPBUSDS128 ...)
|
||||
(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 ...) => (VPDPBUSDS256 ...)
|
||||
(SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16 ...) => (VPDPBUSDS512 ...)
|
||||
(SaturatedUnsignedSignedQuadDotProdAccumulateUint32x4 ...) => (VPDPBUSDS128 ...)
|
||||
(SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8 ...) => (VPDPBUSDS256 ...)
|
||||
(SignInt16x16 ...) => (VPSIGNW256 ...)
|
||||
(SignInt16x8 ...) => (VPSIGNW128 ...)
|
||||
(SignInt32x4 ...) => (VPSIGND128 ...)
|
||||
|
|
@ -1273,6 +1303,12 @@
|
|||
(TruncWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+3] x)
|
||||
(TruncWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+3] x)
|
||||
(TruncWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+3] x)
|
||||
(UnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSD512 ...)
|
||||
(UnsignedSignedQuadDotProdAccumulateInt32x4 ...) => (VPDPBUSD128 ...)
|
||||
(UnsignedSignedQuadDotProdAccumulateInt32x8 ...) => (VPDPBUSD256 ...)
|
||||
(UnsignedSignedQuadDotProdAccumulateUint32x16 ...) => (VPDPBUSD512 ...)
|
||||
(UnsignedSignedQuadDotProdAccumulateUint32x4 ...) => (VPDPBUSD128 ...)
|
||||
(UnsignedSignedQuadDotProdAccumulateUint32x8 ...) => (VPDPBUSD256 ...)
|
||||
(XorFloat32x16 ...) => (VXORPS512 ...)
|
||||
(XorFloat32x4 ...) => (VXORPS128 ...)
|
||||
(XorFloat32x8 ...) => (VXORPS256 ...)
|
||||
|
|
|
|||
|
|
@ -283,15 +283,23 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
|
|||
{name: "VPMINSDMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMINSD", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMULLDMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMULLD", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPORDMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPORD", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPDPWSSDMasked512", argLength: 4, reg: fp3k1fp1, asm: "VPDPWSSD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPOPCNTDMasked512", argLength: 2, reg: fp1k1fp1, asm: "VPOPCNTD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPDPWSSDSMasked512", argLength: 4, reg: fp3k1fp1, asm: "VPDPWSSDS", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPDPBUSDSMasked512", argLength: 4, reg: fp3k1fp1, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSUBDMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPSUBD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPDPBUSDMasked512", argLength: 4, reg: fp3k1fp1, asm: "VPDPBUSD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPXORDMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPXORD", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMAXSD512", argLength: 2, reg: fp21, asm: "VPMAXSD", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMINSD512", argLength: 2, reg: fp21, asm: "VPMINSD", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPMULLD512", argLength: 2, reg: fp21, asm: "VPMULLD", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPORD512", argLength: 2, reg: fp21, asm: "VPORD", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPDPWSSD512", argLength: 3, reg: fp31, asm: "VPDPWSSD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPOPCNTD512", argLength: 1, reg: fp11, asm: "VPOPCNTD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPDPWSSDS512", argLength: 3, reg: fp31, asm: "VPDPWSSDS", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPDPBUSDS512", argLength: 3, reg: fp31, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSUBD512", argLength: 2, reg: fp21, asm: "VPSUBD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPDPBUSD512", argLength: 3, reg: fp31, asm: "VPDPBUSD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPXORD512", argLength: 2, reg: fp21, asm: "VPXORD", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPABSD128", argLength: 1, reg: fp11, asm: "VPABSD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPADDD128", argLength: 2, reg: fp21, asm: "VPADDD", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||
|
|
@ -307,18 +315,26 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
|
|||
{name: "VPMINSDMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMINSD", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMULLDMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMULLD", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPORDMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPORD", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPDPWSSDMasked128", argLength: 4, reg: fp3k1fp1, asm: "VPDPWSSD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPOPCNTDMasked128", argLength: 2, reg: fp1k1fp1, asm: "VPOPCNTD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPDPWSSDSMasked128", argLength: 4, reg: fp3k1fp1, asm: "VPDPWSSDS", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPDPBUSDSMasked128", argLength: 4, reg: fp3k1fp1, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSUBDMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPSUBD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPDPBUSDMasked128", argLength: 4, reg: fp3k1fp1, asm: "VPDPBUSD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPXORDMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPXORD", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMAXSD128", argLength: 2, reg: fp21, asm: "VPMAXSD", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMINSD128", argLength: 2, reg: fp21, asm: "VPMINSD", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMULDQ128", argLength: 2, reg: fp21, asm: "VPMULDQ", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPMULLD128", argLength: 2, reg: fp21, asm: "VPMULLD", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPDPWSSD128", argLength: 3, reg: fp31, asm: "VPDPWSSD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPHADDD128", argLength: 2, reg: fp21, asm: "VPHADDD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPHSUBD128", argLength: 2, reg: fp21, asm: "VPHSUBD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPOPCNTD128", argLength: 1, reg: fp11, asm: "VPOPCNTD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPDPWSSDS128", argLength: 3, reg: fp31, asm: "VPDPWSSDS", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPDPBUSDS128", argLength: 3, reg: fp31, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSIGND128", argLength: 2, reg: fp21, asm: "VPSIGND", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPSUBD128", argLength: 2, reg: fp21, asm: "VPSUBD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPDPBUSD128", argLength: 3, reg: fp31, asm: "VPDPBUSD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPABSD256", argLength: 1, reg: fp11, asm: "VPABSD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPADDD256", argLength: 2, reg: fp21, asm: "VPADDD", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPCMPEQD256", argLength: 2, reg: fp21, asm: "VPCMPEQD", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||
|
|
@ -333,18 +349,26 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
|
|||
{name: "VPMINSDMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMINSD", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMULLDMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMULLD", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPORDMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPORD", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPDPWSSDMasked256", argLength: 4, reg: fp3k1fp1, asm: "VPDPWSSD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPOPCNTDMasked256", argLength: 2, reg: fp1k1fp1, asm: "VPOPCNTD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPDPWSSDSMasked256", argLength: 4, reg: fp3k1fp1, asm: "VPDPWSSDS", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPDPBUSDSMasked256", argLength: 4, reg: fp3k1fp1, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSUBDMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPSUBD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPDPBUSDMasked256", argLength: 4, reg: fp3k1fp1, asm: "VPDPBUSD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPXORDMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPXORD", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMAXSD256", argLength: 2, reg: fp21, asm: "VPMAXSD", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMINSD256", argLength: 2, reg: fp21, asm: "VPMINSD", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMULDQ256", argLength: 2, reg: fp21, asm: "VPMULDQ", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPMULLD256", argLength: 2, reg: fp21, asm: "VPMULLD", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPDPWSSD256", argLength: 3, reg: fp31, asm: "VPDPWSSD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPHADDD256", argLength: 2, reg: fp21, asm: "VPHADDD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPHSUBD256", argLength: 2, reg: fp21, asm: "VPHSUBD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPOPCNTD256", argLength: 1, reg: fp11, asm: "VPOPCNTD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPDPWSSDS256", argLength: 3, reg: fp31, asm: "VPDPWSSDS", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPDPBUSDS256", argLength: 3, reg: fp31, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSIGND256", argLength: 2, reg: fp21, asm: "VPSIGND", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPSUBD256", argLength: 2, reg: fp21, asm: "VPSUBD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPDPBUSD256", argLength: 3, reg: fp31, asm: "VPDPBUSD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPABSQ128", argLength: 1, reg: fp11, asm: "VPABSQ", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPADDQ128", argLength: 2, reg: fp21, asm: "VPADDQ", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPCMPEQQ128", argLength: 2, reg: fp21, asm: "VPCMPEQQ", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||
|
|
|
|||
|
|
@ -427,16 +427,24 @@ func simdGenericOps() []opData {
|
|||
{name: "MaskedMulLowInt32x16", argLength: 3, commutative: true},
|
||||
{name: "MaskedNotEqualInt32x16", argLength: 3, commutative: true},
|
||||
{name: "MaskedOrInt32x16", argLength: 3, commutative: true},
|
||||
{name: "MaskedPairDotProdAccumulateInt32x16", argLength: 4, commutative: false},
|
||||
{name: "MaskedPopCountInt32x16", argLength: 2, commutative: false},
|
||||
{name: "MaskedSaturatedPairDotProdAccumulateInt32x16", argLength: 4, commutative: false},
|
||||
{name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 4, commutative: false},
|
||||
{name: "MaskedSubInt32x16", argLength: 3, commutative: false},
|
||||
{name: "MaskedUnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 4, commutative: false},
|
||||
{name: "MaskedXorInt32x16", argLength: 3, commutative: true},
|
||||
{name: "MaxInt32x16", argLength: 2, commutative: true},
|
||||
{name: "MinInt32x16", argLength: 2, commutative: true},
|
||||
{name: "MulLowInt32x16", argLength: 2, commutative: true},
|
||||
{name: "NotEqualInt32x16", argLength: 2, commutative: true},
|
||||
{name: "OrInt32x16", argLength: 2, commutative: true},
|
||||
{name: "PairDotProdAccumulateInt32x16", argLength: 3, commutative: false},
|
||||
{name: "PopCountInt32x16", argLength: 1, commutative: false},
|
||||
{name: "SaturatedPairDotProdAccumulateInt32x16", argLength: 3, commutative: false},
|
||||
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 3, commutative: false},
|
||||
{name: "SubInt32x16", argLength: 2, commutative: false},
|
||||
{name: "UnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 3, commutative: false},
|
||||
{name: "XorInt32x16", argLength: 2, commutative: true},
|
||||
{name: "AbsoluteInt32x4", argLength: 1, commutative: false},
|
||||
{name: "AddInt32x4", argLength: 2, commutative: true},
|
||||
|
|
@ -461,8 +469,12 @@ func simdGenericOps() []opData {
|
|||
{name: "MaskedMulLowInt32x4", argLength: 3, commutative: true},
|
||||
{name: "MaskedNotEqualInt32x4", argLength: 3, commutative: true},
|
||||
{name: "MaskedOrInt32x4", argLength: 3, commutative: true},
|
||||
{name: "MaskedPairDotProdAccumulateInt32x4", argLength: 4, commutative: false},
|
||||
{name: "MaskedPopCountInt32x4", argLength: 2, commutative: false},
|
||||
{name: "MaskedSaturatedPairDotProdAccumulateInt32x4", argLength: 4, commutative: false},
|
||||
{name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 4, commutative: false},
|
||||
{name: "MaskedSubInt32x4", argLength: 3, commutative: false},
|
||||
{name: "MaskedUnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 4, commutative: false},
|
||||
{name: "MaskedXorInt32x4", argLength: 3, commutative: true},
|
||||
{name: "MaxInt32x4", argLength: 2, commutative: true},
|
||||
{name: "MinInt32x4", argLength: 2, commutative: true},
|
||||
|
|
@ -470,11 +482,15 @@ func simdGenericOps() []opData {
|
|||
{name: "MulLowInt32x4", argLength: 2, commutative: true},
|
||||
{name: "NotEqualInt32x4", argLength: 2, commutative: true},
|
||||
{name: "OrInt32x4", argLength: 2, commutative: true},
|
||||
{name: "PairDotProdAccumulateInt32x4", argLength: 3, commutative: false},
|
||||
{name: "PairwiseAddInt32x4", argLength: 2, commutative: false},
|
||||
{name: "PairwiseSubInt32x4", argLength: 2, commutative: false},
|
||||
{name: "PopCountInt32x4", argLength: 1, commutative: false},
|
||||
{name: "SaturatedPairDotProdAccumulateInt32x4", argLength: 3, commutative: false},
|
||||
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 3, commutative: false},
|
||||
{name: "SignInt32x4", argLength: 2, commutative: false},
|
||||
{name: "SubInt32x4", argLength: 2, commutative: false},
|
||||
{name: "UnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 3, commutative: false},
|
||||
{name: "XorInt32x4", argLength: 2, commutative: true},
|
||||
{name: "AbsoluteInt32x8", argLength: 1, commutative: false},
|
||||
{name: "AddInt32x8", argLength: 2, commutative: true},
|
||||
|
|
@ -499,8 +515,12 @@ func simdGenericOps() []opData {
|
|||
{name: "MaskedMulLowInt32x8", argLength: 3, commutative: true},
|
||||
{name: "MaskedNotEqualInt32x8", argLength: 3, commutative: true},
|
||||
{name: "MaskedOrInt32x8", argLength: 3, commutative: true},
|
||||
{name: "MaskedPairDotProdAccumulateInt32x8", argLength: 4, commutative: false},
|
||||
{name: "MaskedPopCountInt32x8", argLength: 2, commutative: false},
|
||||
{name: "MaskedSaturatedPairDotProdAccumulateInt32x8", argLength: 4, commutative: false},
|
||||
{name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 4, commutative: false},
|
||||
{name: "MaskedSubInt32x8", argLength: 3, commutative: false},
|
||||
{name: "MaskedUnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 4, commutative: false},
|
||||
{name: "MaskedXorInt32x8", argLength: 3, commutative: true},
|
||||
{name: "MaxInt32x8", argLength: 2, commutative: true},
|
||||
{name: "MinInt32x8", argLength: 2, commutative: true},
|
||||
|
|
@ -508,11 +528,15 @@ func simdGenericOps() []opData {
|
|||
{name: "MulLowInt32x8", argLength: 2, commutative: true},
|
||||
{name: "NotEqualInt32x8", argLength: 2, commutative: true},
|
||||
{name: "OrInt32x8", argLength: 2, commutative: true},
|
||||
{name: "PairDotProdAccumulateInt32x8", argLength: 3, commutative: false},
|
||||
{name: "PairwiseAddInt32x8", argLength: 2, commutative: false},
|
||||
{name: "PairwiseSubInt32x8", argLength: 2, commutative: false},
|
||||
{name: "PopCountInt32x8", argLength: 1, commutative: false},
|
||||
{name: "SaturatedPairDotProdAccumulateInt32x8", argLength: 3, commutative: false},
|
||||
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 3, commutative: false},
|
||||
{name: "SignInt32x8", argLength: 2, commutative: false},
|
||||
{name: "SubInt32x8", argLength: 2, commutative: false},
|
||||
{name: "UnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 3, commutative: false},
|
||||
{name: "XorInt32x8", argLength: 2, commutative: true},
|
||||
{name: "AbsoluteInt64x2", argLength: 1, commutative: false},
|
||||
{name: "AddInt64x2", argLength: 2, commutative: true},
|
||||
|
|
@ -845,14 +869,18 @@ func simdGenericOps() []opData {
|
|||
{name: "MaskedNotEqualUint32x16", argLength: 3, commutative: true},
|
||||
{name: "MaskedOrUint32x16", argLength: 3, commutative: true},
|
||||
{name: "MaskedPopCountUint32x16", argLength: 2, commutative: false},
|
||||
{name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 4, commutative: false},
|
||||
{name: "MaskedSubUint32x16", argLength: 3, commutative: false},
|
||||
{name: "MaskedUnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 4, commutative: false},
|
||||
{name: "MaskedXorUint32x16", argLength: 3, commutative: true},
|
||||
{name: "MaxUint32x16", argLength: 2, commutative: true},
|
||||
{name: "MinUint32x16", argLength: 2, commutative: true},
|
||||
{name: "NotEqualUint32x16", argLength: 2, commutative: true},
|
||||
{name: "OrUint32x16", argLength: 2, commutative: true},
|
||||
{name: "PopCountUint32x16", argLength: 1, commutative: false},
|
||||
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 3, commutative: false},
|
||||
{name: "SubUint32x16", argLength: 2, commutative: false},
|
||||
{name: "UnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 3, commutative: false},
|
||||
{name: "XorUint32x16", argLength: 2, commutative: true},
|
||||
{name: "AddUint32x4", argLength: 2, commutative: true},
|
||||
{name: "AndUint32x4", argLength: 2, commutative: true},
|
||||
|
|
@ -875,7 +903,9 @@ func simdGenericOps() []opData {
|
|||
{name: "MaskedNotEqualUint32x4", argLength: 3, commutative: true},
|
||||
{name: "MaskedOrUint32x4", argLength: 3, commutative: true},
|
||||
{name: "MaskedPopCountUint32x4", argLength: 2, commutative: false},
|
||||
{name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 4, commutative: false},
|
||||
{name: "MaskedSubUint32x4", argLength: 3, commutative: false},
|
||||
{name: "MaskedUnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 4, commutative: false},
|
||||
{name: "MaskedXorUint32x4", argLength: 3, commutative: true},
|
||||
{name: "MaxUint32x4", argLength: 2, commutative: true},
|
||||
{name: "MinUint32x4", argLength: 2, commutative: true},
|
||||
|
|
@ -885,7 +915,9 @@ func simdGenericOps() []opData {
|
|||
{name: "PairwiseAddUint32x4", argLength: 2, commutative: false},
|
||||
{name: "PairwiseSubUint32x4", argLength: 2, commutative: false},
|
||||
{name: "PopCountUint32x4", argLength: 1, commutative: false},
|
||||
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 3, commutative: false},
|
||||
{name: "SubUint32x4", argLength: 2, commutative: false},
|
||||
{name: "UnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 3, commutative: false},
|
||||
{name: "XorUint32x4", argLength: 2, commutative: true},
|
||||
{name: "AddUint32x8", argLength: 2, commutative: true},
|
||||
{name: "AndUint32x8", argLength: 2, commutative: true},
|
||||
|
|
@ -908,7 +940,9 @@ func simdGenericOps() []opData {
|
|||
{name: "MaskedNotEqualUint32x8", argLength: 3, commutative: true},
|
||||
{name: "MaskedOrUint32x8", argLength: 3, commutative: true},
|
||||
{name: "MaskedPopCountUint32x8", argLength: 2, commutative: false},
|
||||
{name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 4, commutative: false},
|
||||
{name: "MaskedSubUint32x8", argLength: 3, commutative: false},
|
||||
{name: "MaskedUnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 4, commutative: false},
|
||||
{name: "MaskedXorUint32x8", argLength: 3, commutative: true},
|
||||
{name: "MaxUint32x8", argLength: 2, commutative: true},
|
||||
{name: "MinUint32x8", argLength: 2, commutative: true},
|
||||
|
|
@ -918,7 +952,9 @@ func simdGenericOps() []opData {
|
|||
{name: "PairwiseAddUint32x8", argLength: 2, commutative: false},
|
||||
{name: "PairwiseSubUint32x8", argLength: 2, commutative: false},
|
||||
{name: "PopCountUint32x8", argLength: 1, commutative: false},
|
||||
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 3, commutative: false},
|
||||
{name: "SubUint32x8", argLength: 2, commutative: false},
|
||||
{name: "UnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 3, commutative: false},
|
||||
{name: "XorUint32x8", argLength: 2, commutative: true},
|
||||
{name: "AddUint64x2", argLength: 2, commutative: true},
|
||||
{name: "AndUint64x2", argLength: 2, commutative: true},
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -2696,6 +2696,12 @@ func rewriteValueAMD64(v *Value) bool {
|
|||
return rewriteValueAMD64_OpMaskedOrUint64x4(v)
|
||||
case OpMaskedOrUint64x8:
|
||||
return rewriteValueAMD64_OpMaskedOrUint64x8(v)
|
||||
case OpMaskedPairDotProdAccumulateInt32x16:
|
||||
return rewriteValueAMD64_OpMaskedPairDotProdAccumulateInt32x16(v)
|
||||
case OpMaskedPairDotProdAccumulateInt32x4:
|
||||
return rewriteValueAMD64_OpMaskedPairDotProdAccumulateInt32x4(v)
|
||||
case OpMaskedPairDotProdAccumulateInt32x8:
|
||||
return rewriteValueAMD64_OpMaskedPairDotProdAccumulateInt32x8(v)
|
||||
case OpMaskedPairDotProdInt16x16:
|
||||
return rewriteValueAMD64_OpMaskedPairDotProdInt16x16(v)
|
||||
case OpMaskedPairDotProdInt16x32:
|
||||
|
|
@ -2798,6 +2804,12 @@ func rewriteValueAMD64(v *Value) bool {
|
|||
return rewriteValueAMD64_OpMaskedSaturatedAddUint8x32(v)
|
||||
case OpMaskedSaturatedAddUint8x64:
|
||||
return rewriteValueAMD64_OpMaskedSaturatedAddUint8x64(v)
|
||||
case OpMaskedSaturatedPairDotProdAccumulateInt32x16:
|
||||
return rewriteValueAMD64_OpMaskedSaturatedPairDotProdAccumulateInt32x16(v)
|
||||
case OpMaskedSaturatedPairDotProdAccumulateInt32x4:
|
||||
return rewriteValueAMD64_OpMaskedSaturatedPairDotProdAccumulateInt32x4(v)
|
||||
case OpMaskedSaturatedPairDotProdAccumulateInt32x8:
|
||||
return rewriteValueAMD64_OpMaskedSaturatedPairDotProdAccumulateInt32x8(v)
|
||||
case OpMaskedSaturatedSubInt16x16:
|
||||
return rewriteValueAMD64_OpMaskedSaturatedSubInt16x16(v)
|
||||
case OpMaskedSaturatedSubInt16x32:
|
||||
|
|
@ -2828,6 +2840,18 @@ func rewriteValueAMD64(v *Value) bool {
|
|||
return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x32(v)
|
||||
case OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8:
|
||||
return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8(v)
|
||||
case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16:
|
||||
return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16(v)
|
||||
case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4:
|
||||
return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4(v)
|
||||
case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8:
|
||||
return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8(v)
|
||||
case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16:
|
||||
return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16(v)
|
||||
case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4:
|
||||
return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4(v)
|
||||
case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8:
|
||||
return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8(v)
|
||||
case OpMaskedSqrtFloat32x16:
|
||||
return rewriteValueAMD64_OpMaskedSqrtFloat32x16(v)
|
||||
case OpMaskedSqrtFloat32x4:
|
||||
|
|
@ -2924,6 +2948,18 @@ func rewriteValueAMD64(v *Value) bool {
|
|||
return rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat64x4(v)
|
||||
case OpMaskedTruncWithPrecisionFloat64x8:
|
||||
return rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat64x8(v)
|
||||
case OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x16:
|
||||
return rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x16(v)
|
||||
case OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x4:
|
||||
return rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x4(v)
|
||||
case OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x8:
|
||||
return rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x8(v)
|
||||
case OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x16:
|
||||
return rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x16(v)
|
||||
case OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x4:
|
||||
return rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x4(v)
|
||||
case OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x8:
|
||||
return rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x8(v)
|
||||
case OpMaskedXorFloat32x16:
|
||||
return rewriteValueAMD64_OpMaskedXorFloat32x16(v)
|
||||
case OpMaskedXorFloat32x4:
|
||||
|
|
@ -3490,6 +3526,15 @@ func rewriteValueAMD64(v *Value) bool {
|
|||
case OpOrUint8x32:
|
||||
v.Op = OpAMD64VPOR256
|
||||
return true
|
||||
case OpPairDotProdAccumulateInt32x16:
|
||||
v.Op = OpAMD64VPDPWSSD512
|
||||
return true
|
||||
case OpPairDotProdAccumulateInt32x4:
|
||||
v.Op = OpAMD64VPDPWSSD128
|
||||
return true
|
||||
case OpPairDotProdAccumulateInt32x8:
|
||||
v.Op = OpAMD64VPDPWSSD256
|
||||
return true
|
||||
case OpPairDotProdInt16x16:
|
||||
v.Op = OpAMD64VPMADDWD256
|
||||
return true
|
||||
|
|
@ -3813,6 +3858,15 @@ func rewriteValueAMD64(v *Value) bool {
|
|||
case OpSaturatedAddUint8x64:
|
||||
v.Op = OpAMD64VPADDSB512
|
||||
return true
|
||||
case OpSaturatedPairDotProdAccumulateInt32x16:
|
||||
v.Op = OpAMD64VPDPWSSDS512
|
||||
return true
|
||||
case OpSaturatedPairDotProdAccumulateInt32x4:
|
||||
v.Op = OpAMD64VPDPWSSDS128
|
||||
return true
|
||||
case OpSaturatedPairDotProdAccumulateInt32x8:
|
||||
v.Op = OpAMD64VPDPWSSDS256
|
||||
return true
|
||||
case OpSaturatedPairwiseAddInt16x16:
|
||||
v.Op = OpAMD64VPHADDSW256
|
||||
return true
|
||||
|
|
@ -3876,6 +3930,24 @@ func rewriteValueAMD64(v *Value) bool {
|
|||
case OpSaturatedUnsignedSignedPairDotProdUint8x32:
|
||||
v.Op = OpAMD64VPMADDUBSW256
|
||||
return true
|
||||
case OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16:
|
||||
v.Op = OpAMD64VPDPBUSDS512
|
||||
return true
|
||||
case OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4:
|
||||
v.Op = OpAMD64VPDPBUSDS128
|
||||
return true
|
||||
case OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8:
|
||||
v.Op = OpAMD64VPDPBUSDS256
|
||||
return true
|
||||
case OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16:
|
||||
v.Op = OpAMD64VPDPBUSDS512
|
||||
return true
|
||||
case OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4:
|
||||
v.Op = OpAMD64VPDPBUSDS128
|
||||
return true
|
||||
case OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8:
|
||||
v.Op = OpAMD64VPDPBUSDS256
|
||||
return true
|
||||
case OpSelect0:
|
||||
return rewriteValueAMD64_OpSelect0(v)
|
||||
case OpSelect1:
|
||||
|
|
@ -4119,6 +4191,24 @@ func rewriteValueAMD64(v *Value) bool {
|
|||
return rewriteValueAMD64_OpTruncWithPrecisionFloat64x4(v)
|
||||
case OpTruncWithPrecisionFloat64x8:
|
||||
return rewriteValueAMD64_OpTruncWithPrecisionFloat64x8(v)
|
||||
case OpUnsignedSignedQuadDotProdAccumulateInt32x16:
|
||||
v.Op = OpAMD64VPDPBUSD512
|
||||
return true
|
||||
case OpUnsignedSignedQuadDotProdAccumulateInt32x4:
|
||||
v.Op = OpAMD64VPDPBUSD128
|
||||
return true
|
||||
case OpUnsignedSignedQuadDotProdAccumulateInt32x8:
|
||||
v.Op = OpAMD64VPDPBUSD256
|
||||
return true
|
||||
case OpUnsignedSignedQuadDotProdAccumulateUint32x16:
|
||||
v.Op = OpAMD64VPDPBUSD512
|
||||
return true
|
||||
case OpUnsignedSignedQuadDotProdAccumulateUint32x4:
|
||||
v.Op = OpAMD64VPDPBUSD128
|
||||
return true
|
||||
case OpUnsignedSignedQuadDotProdAccumulateUint32x8:
|
||||
v.Op = OpAMD64VPDPBUSD256
|
||||
return true
|
||||
case OpWB:
|
||||
v.Op = OpAMD64LoweredWB
|
||||
return true
|
||||
|
|
@ -42772,6 +42862,66 @@ func rewriteValueAMD64_OpMaskedOrUint64x8(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpMaskedPairDotProdAccumulateInt32x16(v *Value) bool {
|
||||
v_3 := v.Args[3]
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (MaskedPairDotProdAccumulateInt32x16 x y z mask)
|
||||
// result: (VPDPWSSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
z := v_2
|
||||
mask := v_3
|
||||
v.reset(OpAMD64VPDPWSSDMasked512)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg4(x, y, z, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpMaskedPairDotProdAccumulateInt32x4(v *Value) bool {
|
||||
v_3 := v.Args[3]
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (MaskedPairDotProdAccumulateInt32x4 x y z mask)
|
||||
// result: (VPDPWSSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
z := v_2
|
||||
mask := v_3
|
||||
v.reset(OpAMD64VPDPWSSDMasked128)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg4(x, y, z, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpMaskedPairDotProdAccumulateInt32x8(v *Value) bool {
|
||||
v_3 := v.Args[3]
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (MaskedPairDotProdAccumulateInt32x8 x y z mask)
|
||||
// result: (VPDPWSSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
z := v_2
|
||||
mask := v_3
|
||||
v.reset(OpAMD64VPDPWSSDMasked256)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg4(x, y, z, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpMaskedPairDotProdInt16x16(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
|
|
@ -43642,6 +43792,66 @@ func rewriteValueAMD64_OpMaskedSaturatedAddUint8x64(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpMaskedSaturatedPairDotProdAccumulateInt32x16(v *Value) bool {
|
||||
v_3 := v.Args[3]
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (MaskedSaturatedPairDotProdAccumulateInt32x16 x y z mask)
|
||||
// result: (VPDPWSSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
z := v_2
|
||||
mask := v_3
|
||||
v.reset(OpAMD64VPDPWSSDSMasked512)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg4(x, y, z, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpMaskedSaturatedPairDotProdAccumulateInt32x4(v *Value) bool {
|
||||
v_3 := v.Args[3]
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (MaskedSaturatedPairDotProdAccumulateInt32x4 x y z mask)
|
||||
// result: (VPDPWSSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
z := v_2
|
||||
mask := v_3
|
||||
v.reset(OpAMD64VPDPWSSDSMasked128)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg4(x, y, z, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpMaskedSaturatedPairDotProdAccumulateInt32x8(v *Value) bool {
|
||||
v_3 := v.Args[3]
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (MaskedSaturatedPairDotProdAccumulateInt32x8 x y z mask)
|
||||
// result: (VPDPWSSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
z := v_2
|
||||
mask := v_3
|
||||
v.reset(OpAMD64VPDPWSSDSMasked256)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg4(x, y, z, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpMaskedSaturatedSubInt16x16(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
|
|
@ -43912,6 +44122,126 @@ func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8(v *Val
|
|||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16(v *Value) bool {
|
||||
v_3 := v.Args[3]
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 x y z mask)
|
||||
// result: (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
z := v_2
|
||||
mask := v_3
|
||||
v.reset(OpAMD64VPDPBUSDSMasked512)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg4(x, y, z, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4(v *Value) bool {
|
||||
v_3 := v.Args[3]
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 x y z mask)
|
||||
// result: (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
z := v_2
|
||||
mask := v_3
|
||||
v.reset(OpAMD64VPDPBUSDSMasked128)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg4(x, y, z, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8(v *Value) bool {
|
||||
v_3 := v.Args[3]
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 x y z mask)
|
||||
// result: (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
z := v_2
|
||||
mask := v_3
|
||||
v.reset(OpAMD64VPDPBUSDSMasked256)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg4(x, y, z, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16(v *Value) bool {
|
||||
v_3 := v.Args[3]
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16 x y z mask)
|
||||
// result: (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
z := v_2
|
||||
mask := v_3
|
||||
v.reset(OpAMD64VPDPBUSDSMasked512)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg4(x, y, z, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4(v *Value) bool {
|
||||
v_3 := v.Args[3]
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4 x y z mask)
|
||||
// result: (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
z := v_2
|
||||
mask := v_3
|
||||
v.reset(OpAMD64VPDPBUSDSMasked128)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg4(x, y, z, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8(v *Value) bool {
|
||||
v_3 := v.Args[3]
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8 x y z mask)
|
||||
// result: (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
z := v_2
|
||||
mask := v_3
|
||||
v.reset(OpAMD64VPDPBUSDSMasked256)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg4(x, y, z, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpMaskedSqrtFloat32x16(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
|
|
@ -44764,6 +45094,126 @@ func rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat64x8(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x16(v *Value) bool {
|
||||
v_3 := v.Args[3]
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (MaskedUnsignedSignedQuadDotProdAccumulateInt32x16 x y z mask)
|
||||
// result: (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
z := v_2
|
||||
mask := v_3
|
||||
v.reset(OpAMD64VPDPBUSDMasked512)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg4(x, y, z, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x4(v *Value) bool {
|
||||
v_3 := v.Args[3]
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (MaskedUnsignedSignedQuadDotProdAccumulateInt32x4 x y z mask)
|
||||
// result: (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
z := v_2
|
||||
mask := v_3
|
||||
v.reset(OpAMD64VPDPBUSDMasked128)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg4(x, y, z, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x8(v *Value) bool {
|
||||
v_3 := v.Args[3]
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (MaskedUnsignedSignedQuadDotProdAccumulateInt32x8 x y z mask)
|
||||
// result: (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
z := v_2
|
||||
mask := v_3
|
||||
v.reset(OpAMD64VPDPBUSDMasked256)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg4(x, y, z, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x16(v *Value) bool {
|
||||
v_3 := v.Args[3]
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (MaskedUnsignedSignedQuadDotProdAccumulateUint32x16 x y z mask)
|
||||
// result: (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
z := v_2
|
||||
mask := v_3
|
||||
v.reset(OpAMD64VPDPBUSDMasked512)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg4(x, y, z, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x4(v *Value) bool {
|
||||
v_3 := v.Args[3]
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (MaskedUnsignedSignedQuadDotProdAccumulateUint32x4 x y z mask)
|
||||
// result: (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
z := v_2
|
||||
mask := v_3
|
||||
v.reset(OpAMD64VPDPBUSDMasked128)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg4(x, y, z, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x8(v *Value) bool {
|
||||
v_3 := v.Args[3]
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (MaskedUnsignedSignedQuadDotProdAccumulateUint32x8 x y z mask)
|
||||
// result: (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
z := v_2
|
||||
mask := v_3
|
||||
v.reset(OpAMD64VPDPBUSDMasked256)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg4(x, y, z, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpMaskedXorFloat32x16(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
|
|
|
|||
|
|
@ -833,6 +833,10 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Int32x16.MaskedOr", opLen3(ssa.OpMaskedOrInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.MaskedSub", opLen3(ssa.OpMaskedSubInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.MaskedXor", opLen3(ssa.OpMaskedXorInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.PairDotProdAccumulate", opLen3(ssa.OpPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.SaturatedPairDotProdAccumulate", opLen3(ssa.OpSaturatedPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.MaskedAdd", opLen3(ssa.OpMaskedAddInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.MaskedAnd", opLen3(ssa.OpMaskedAndInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.MaskedAndNot", opLen3(ssa.OpMaskedAndNotInt32x4, types.TypeVec128), sys.AMD64)
|
||||
|
|
@ -848,6 +852,10 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Int32x4.MaskedOr", opLen3(ssa.OpMaskedOrInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.MaskedSub", opLen3(ssa.OpMaskedSubInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.MaskedXor", opLen3(ssa.OpMaskedXorInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.PairDotProdAccumulate", opLen3(ssa.OpPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.SaturatedPairDotProdAccumulate", opLen3(ssa.OpSaturatedPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.MaskedAdd", opLen3(ssa.OpMaskedAddInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.MaskedAnd", opLen3(ssa.OpMaskedAndInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.MaskedAndNot", opLen3(ssa.OpMaskedAndNotInt32x8, types.TypeVec256), sys.AMD64)
|
||||
|
|
@ -863,6 +871,10 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Int32x8.MaskedOr", opLen3(ssa.OpMaskedOrInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.MaskedSub", opLen3(ssa.OpMaskedSubInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.MaskedXor", opLen3(ssa.OpMaskedXorInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.PairDotProdAccumulate", opLen3(ssa.OpPairDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.SaturatedPairDotProdAccumulate", opLen3(ssa.OpSaturatedPairDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.MaskedAdd", opLen3(ssa.OpMaskedAddInt64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.MaskedAnd", opLen3(ssa.OpMaskedAndInt64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.MaskedAndNot", opLen3(ssa.OpMaskedAndNotInt64x2, types.TypeVec128), sys.AMD64)
|
||||
|
|
@ -1006,6 +1018,8 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Uint32x16.MaskedOr", opLen3(ssa.OpMaskedOrUint32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x16.MaskedSub", opLen3(ssa.OpMaskedSubUint32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x16.MaskedXor", opLen3(ssa.OpMaskedXorUint32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x16.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x16.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.MaskedAdd", opLen3(ssa.OpMaskedAddUint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.MaskedAnd", opLen3(ssa.OpMaskedAndUint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.MaskedAndNot", opLen3(ssa.OpMaskedAndNotUint32x4, types.TypeVec128), sys.AMD64)
|
||||
|
|
@ -1020,6 +1034,8 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Uint32x4.MaskedOr", opLen3(ssa.OpMaskedOrUint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.MaskedSub", opLen3(ssa.OpMaskedSubUint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.MaskedXor", opLen3(ssa.OpMaskedXorUint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.MaskedAdd", opLen3(ssa.OpMaskedAddUint32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.MaskedAnd", opLen3(ssa.OpMaskedAndUint32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.MaskedAndNot", opLen3(ssa.OpMaskedAndNotUint32x8, types.TypeVec256), sys.AMD64)
|
||||
|
|
@ -1034,6 +1050,8 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Uint32x8.MaskedOr", opLen3(ssa.OpMaskedOrUint32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.MaskedSub", opLen3(ssa.OpMaskedSubUint32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.MaskedXor", opLen3(ssa.OpMaskedXorUint32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x2.MaskedAdd", opLen3(ssa.OpMaskedAddUint64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x2.MaskedAnd", opLen3(ssa.OpMaskedAndUint64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x2.MaskedAndNot", opLen3(ssa.OpMaskedAndNotUint64x2, types.TypeVec128), sys.AMD64)
|
||||
|
|
@ -1118,6 +1136,24 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Uint8x64.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x64.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x64.MaskedSub", opLen3(ssa.OpMaskedSubUint8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.MaskedPairDotProdAccumulate", opLen4(ssa.OpMaskedPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.MaskedSaturatedPairDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.MaskedPairDotProdAccumulate", opLen4(ssa.OpMaskedPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.MaskedSaturatedPairDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.MaskedPairDotProdAccumulate", opLen4(ssa.OpMaskedPairDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.MaskedSaturatedPairDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedPairDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x16.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x16.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
|
|
|
|||
|
|
@ -766,6 +766,7 @@ func (x Float64x2) AndNot(y Float64x2) Float64x2
|
|||
func (x Float64x2) Div(y Float64x2) Float64x2
|
||||
|
||||
// DotProdBroadcast multiplies all elements and broadcasts the sum.
|
||||
// Const Immediate = 127.
|
||||
//
|
||||
// Asm: VDPPD, CPU Feature: AVX
|
||||
func (x Float64x2) DotProdBroadcast(y Float64x2) Float64x2
|
||||
|
|
@ -4437,6 +4438,26 @@ func (x Int32x16) MaskedSub(y Int32x16, z Mask32x16) Int32x16
|
|||
// Asm: VPXORD, CPU Feature: AVX512EVEX
|
||||
func (x Int32x16) MaskedXor(y Int32x16, z Mask32x16) Int32x16
|
||||
|
||||
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
|
||||
func (x Int32x16) PairDotProdAccumulate(y Int16x32, z Int32x16) Int32x16
|
||||
|
||||
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
|
||||
func (x Int32x16) SaturatedPairDotProdAccumulate(y Int16x32, z Int32x16) Int32x16
|
||||
|
||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
|
||||
func (x Int32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16
|
||||
|
||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
|
||||
func (x Int32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16
|
||||
|
||||
// Add adds corresponding elements of two vectors.
|
||||
//
|
||||
// Asm: VPADDD, CPU Feature: AVX512EVEX
|
||||
|
|
@ -4518,6 +4539,26 @@ func (x Int32x4) MaskedSub(y Int32x4, z Mask32x4) Int32x4
|
|||
// Asm: VPXORD, CPU Feature: AVX512EVEX
|
||||
func (x Int32x4) MaskedXor(y Int32x4, z Mask32x4) Int32x4
|
||||
|
||||
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPWSSD, CPU Feature: AVX_VNNI
|
||||
func (x Int32x4) PairDotProdAccumulate(y Int32x4, z Int32x4) Int32x4
|
||||
|
||||
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPWSSDS, CPU Feature: AVX_VNNI
|
||||
func (x Int32x4) SaturatedPairDotProdAccumulate(y Int32x4, z Int32x4) Int32x4
|
||||
|
||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
|
||||
func (x Int32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Int32x4
|
||||
|
||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
|
||||
func (x Int32x4) UnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Int32x4
|
||||
|
||||
// Add adds corresponding elements of two vectors.
|
||||
//
|
||||
// Asm: VPADDD, CPU Feature: AVX512EVEX
|
||||
|
|
@ -4599,6 +4640,26 @@ func (x Int32x8) MaskedSub(y Int32x8, z Mask32x8) Int32x8
|
|||
// Asm: VPXORD, CPU Feature: AVX512EVEX
|
||||
func (x Int32x8) MaskedXor(y Int32x8, z Mask32x8) Int32x8
|
||||
|
||||
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPWSSD, CPU Feature: AVX_VNNI
|
||||
func (x Int32x8) PairDotProdAccumulate(y Int32x8, z Int32x8) Int32x8
|
||||
|
||||
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPWSSDS, CPU Feature: AVX_VNNI
|
||||
func (x Int32x8) SaturatedPairDotProdAccumulate(y Int32x8, z Int32x8) Int32x8
|
||||
|
||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
|
||||
func (x Int32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Int32x8
|
||||
|
||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
|
||||
func (x Int32x8) UnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Int32x8
|
||||
|
||||
// Add adds corresponding elements of two vectors.
|
||||
//
|
||||
// Asm: VPADDQ, CPU Feature: AVX512EVEX
|
||||
|
|
@ -5380,6 +5441,16 @@ func (x Uint32x16) MaskedSub(y Uint32x16, z Mask32x16) Uint32x16
|
|||
// Asm: VPXORD, CPU Feature: AVX512EVEX
|
||||
func (x Uint32x16) MaskedXor(y Uint32x16, z Mask32x16) Uint32x16
|
||||
|
||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
|
||||
func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Uint32x16
|
||||
|
||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
|
||||
func (x Uint32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Uint32x16
|
||||
|
||||
// Add adds corresponding elements of two vectors.
|
||||
//
|
||||
// Asm: VPADDD, CPU Feature: AVX512EVEX
|
||||
|
|
@ -5456,6 +5527,16 @@ func (x Uint32x4) MaskedSub(y Uint32x4, z Mask32x4) Uint32x4
|
|||
// Asm: VPXORD, CPU Feature: AVX512EVEX
|
||||
func (x Uint32x4) MaskedXor(y Uint32x4, z Mask32x4) Uint32x4
|
||||
|
||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
|
||||
func (x Uint32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Uint32x4
|
||||
|
||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
|
||||
func (x Uint32x4) UnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Uint32x4
|
||||
|
||||
// Add adds corresponding elements of two vectors.
|
||||
//
|
||||
// Asm: VPADDD, CPU Feature: AVX512EVEX
|
||||
|
|
@ -5532,6 +5613,16 @@ func (x Uint32x8) MaskedSub(y Uint32x8, z Mask32x8) Uint32x8
|
|||
// Asm: VPXORD, CPU Feature: AVX512EVEX
|
||||
func (x Uint32x8) MaskedXor(y Uint32x8, z Mask32x8) Uint32x8
|
||||
|
||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
|
||||
func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Uint32x8
|
||||
|
||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
|
||||
func (x Uint32x8) UnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Uint32x8
|
||||
|
||||
// Add adds corresponding elements of two vectors.
|
||||
//
|
||||
// Asm: VPADDQ, CPU Feature: AVX512EVEX
|
||||
|
|
@ -5991,6 +6082,96 @@ func (x Uint8x64) MaskedSaturatedSub(y Uint8x64, z Mask8x64) Uint8x64
|
|||
// Asm: VPSUBB, CPU Feature: AVX512EVEX
|
||||
func (x Uint8x64) MaskedSub(y Uint8x64, z Mask8x64) Uint8x64
|
||||
|
||||
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
|
||||
func (x Int32x16) MaskedPairDotProdAccumulate(y Int16x32, z Int32x16, u Mask32x16) Int32x16
|
||||
|
||||
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
|
||||
func (x Int32x16) MaskedSaturatedPairDotProdAccumulate(y Int16x32, z Int32x16, u Mask32x16) Int32x16
|
||||
|
||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
|
||||
func (x Int32x16) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Int32x16
|
||||
|
||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
|
||||
func (x Int32x16) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Int32x16
|
||||
|
||||
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
|
||||
func (x Int32x4) MaskedPairDotProdAccumulate(y Int16x8, z Int32x4, u Mask32x4) Int32x4
|
||||
|
||||
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
|
||||
func (x Int32x4) MaskedSaturatedPairDotProdAccumulate(y Int16x8, z Int32x4, u Mask32x4) Int32x4
|
||||
|
||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
|
||||
func (x Int32x4) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Int32x4
|
||||
|
||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
|
||||
func (x Int32x4) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Int32x4
|
||||
|
||||
// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
|
||||
func (x Int32x8) MaskedPairDotProdAccumulate(y Int16x16, z Int32x8, u Mask32x8) Int32x8
|
||||
|
||||
// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
|
||||
func (x Int32x8) MaskedSaturatedPairDotProdAccumulate(y Int16x16, z Int32x8, u Mask32x8) Int32x8
|
||||
|
||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
|
||||
func (x Int32x8) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Int32x8
|
||||
|
||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
|
||||
func (x Int32x8) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Int32x8
|
||||
|
||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
|
||||
func (x Uint32x16) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Uint32x16
|
||||
|
||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
|
||||
func (x Uint32x16) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Uint32x16
|
||||
|
||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
|
||||
func (x Uint32x4) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Uint32x4
|
||||
|
||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
|
||||
func (x Uint32x4) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Uint32x4
|
||||
|
||||
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
|
||||
func (x Uint32x8) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Uint32x8
|
||||
|
||||
// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
|
||||
//
|
||||
// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
|
||||
func (x Uint32x8) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Uint32x8
|
||||
|
||||
// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
|
||||
// Const Immediate = 10.
|
||||
//
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue