[dev.simd] cmd/compile, simd: API interface fixes

- Absolute -> Abs
    - ApproximateReciprocal -> Reciprocal
      - Other derived apis also changed.
    - Round -> RoundToEven
      - Other derived apis also changed.
    - Drop DotProdBroadcast
    - Fused(Mul|Add)(Mul|Add)? -> remove the "Fused"
    - MulEvenWiden -> remove 64bit
    - MulLow -> Mul, add unit
    - PairDotProd -> DotProdPairs
      - make AddDotProdPairs machine ops only - peepholes will be in another
        CL at dev.simd.
    - PopCount -> OnesCount
    - Saturated* -> *Saturated
    - Fix (Add|Sub)Saturated uint mappings.
    - UnsignedSignedQuadDotProdAccumulate -> AddDotProdQuadruple
      - The "DotProdQuadruple" instruction does not exist, so no peepholes for
        this.
This CL is generated by CL 694095.

Change-Id: If4110cc04ab96240cf56f2348d35ed2a719687de
Reviewed-on: https://go-review.googlesource.com/c/go/+/694115
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Junyang Shao 2025-08-07 17:05:50 +00:00
parent b226bcc4a9
commit 8eb5f6020e
11 changed files with 5058 additions and 5238 deletions

View file

@ -24,18 +24,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPABSQ128, ssa.OpAMD64VPABSQ128,
ssa.OpAMD64VPABSQ256, ssa.OpAMD64VPABSQ256,
ssa.OpAMD64VPABSQ512, ssa.OpAMD64VPABSQ512,
ssa.OpAMD64VRCPPS128,
ssa.OpAMD64VRCPPS256,
ssa.OpAMD64VRCP14PS512,
ssa.OpAMD64VRCP14PD128,
ssa.OpAMD64VRCP14PD256,
ssa.OpAMD64VRCP14PD512,
ssa.OpAMD64VRSQRTPS128,
ssa.OpAMD64VRSQRTPS256,
ssa.OpAMD64VRSQRT14PS512,
ssa.OpAMD64VRSQRT14PD128,
ssa.OpAMD64VRSQRT14PD256,
ssa.OpAMD64VRSQRT14PD512,
ssa.OpAMD64VCVTTPS2DQ128, ssa.OpAMD64VCVTTPS2DQ128,
ssa.OpAMD64VCVTTPS2DQ256, ssa.OpAMD64VCVTTPS2DQ256,
ssa.OpAMD64VCVTTPS2DQ512, ssa.OpAMD64VCVTTPS2DQ512,
@ -54,6 +42,18 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPOPCNTQ128, ssa.OpAMD64VPOPCNTQ128,
ssa.OpAMD64VPOPCNTQ256, ssa.OpAMD64VPOPCNTQ256,
ssa.OpAMD64VPOPCNTQ512, ssa.OpAMD64VPOPCNTQ512,
ssa.OpAMD64VRCPPS128,
ssa.OpAMD64VRCPPS256,
ssa.OpAMD64VRCP14PS512,
ssa.OpAMD64VRCP14PD128,
ssa.OpAMD64VRCP14PD256,
ssa.OpAMD64VRCP14PD512,
ssa.OpAMD64VRSQRTPS128,
ssa.OpAMD64VRSQRTPS256,
ssa.OpAMD64VRSQRT14PS512,
ssa.OpAMD64VRSQRT14PD128,
ssa.OpAMD64VRSQRT14PD256,
ssa.OpAMD64VRSQRT14PD512,
ssa.OpAMD64VSQRTPS128, ssa.OpAMD64VSQRTPS128,
ssa.OpAMD64VSQRTPS256, ssa.OpAMD64VSQRTPS256,
ssa.OpAMD64VSQRTPS512, ssa.OpAMD64VSQRTPS512,
@ -96,6 +96,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPADDSW128, ssa.OpAMD64VPADDSW128,
ssa.OpAMD64VPADDSW256, ssa.OpAMD64VPADDSW256,
ssa.OpAMD64VPADDSW512, ssa.OpAMD64VPADDSW512,
ssa.OpAMD64VPADDUSB128,
ssa.OpAMD64VPADDUSB256,
ssa.OpAMD64VPADDUSB512,
ssa.OpAMD64VPADDUSW128,
ssa.OpAMD64VPADDUSW256,
ssa.OpAMD64VPADDUSW512,
ssa.OpAMD64VADDSUBPS128, ssa.OpAMD64VADDSUBPS128,
ssa.OpAMD64VADDSUBPS256, ssa.OpAMD64VADDSUBPS256,
ssa.OpAMD64VADDSUBPD128, ssa.OpAMD64VADDSUBPD128,
@ -114,12 +120,24 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPAVGW128, ssa.OpAMD64VPAVGW128,
ssa.OpAMD64VPAVGW256, ssa.OpAMD64VPAVGW256,
ssa.OpAMD64VPAVGW512, ssa.OpAMD64VPAVGW512,
ssa.OpAMD64VPSIGNB128,
ssa.OpAMD64VPSIGNB256,
ssa.OpAMD64VPSIGNW128,
ssa.OpAMD64VPSIGNW256,
ssa.OpAMD64VPSIGND128,
ssa.OpAMD64VPSIGND256,
ssa.OpAMD64VDIVPS128, ssa.OpAMD64VDIVPS128,
ssa.OpAMD64VDIVPS256, ssa.OpAMD64VDIVPS256,
ssa.OpAMD64VDIVPS512, ssa.OpAMD64VDIVPS512,
ssa.OpAMD64VDIVPD128, ssa.OpAMD64VDIVPD128,
ssa.OpAMD64VDIVPD256, ssa.OpAMD64VDIVPD256,
ssa.OpAMD64VDIVPD512, ssa.OpAMD64VDIVPD512,
ssa.OpAMD64VPMADDWD128,
ssa.OpAMD64VPMADDWD256,
ssa.OpAMD64VPMADDWD512,
ssa.OpAMD64VPMADDUBSW128,
ssa.OpAMD64VPMADDUBSW256,
ssa.OpAMD64VPMADDUBSW512,
ssa.OpAMD64VPCMPEQB128, ssa.OpAMD64VPCMPEQB128,
ssa.OpAMD64VPCMPEQB256, ssa.OpAMD64VPCMPEQB256,
ssa.OpAMD64VPCMPEQW128, ssa.OpAMD64VPCMPEQW128,
@ -216,23 +234,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPMULLQ512, ssa.OpAMD64VPMULLQ512,
ssa.OpAMD64VPMULDQ128, ssa.OpAMD64VPMULDQ128,
ssa.OpAMD64VPMULDQ256, ssa.OpAMD64VPMULDQ256,
ssa.OpAMD64VPMULDQ512,
ssa.OpAMD64VPMULUDQ128, ssa.OpAMD64VPMULUDQ128,
ssa.OpAMD64VPMULUDQ256, ssa.OpAMD64VPMULUDQ256,
ssa.OpAMD64VPMULUDQ512,
ssa.OpAMD64VPMULHW128,
ssa.OpAMD64VPMULHW256,
ssa.OpAMD64VPMULHW512,
ssa.OpAMD64VPMULHUW128, ssa.OpAMD64VPMULHUW128,
ssa.OpAMD64VPMULHUW256, ssa.OpAMD64VPMULHUW256,
ssa.OpAMD64VPMULHUW512, ssa.OpAMD64VPMULHW512,
ssa.OpAMD64VPOR128, ssa.OpAMD64VPOR128,
ssa.OpAMD64VPOR256, ssa.OpAMD64VPOR256,
ssa.OpAMD64VPORD512, ssa.OpAMD64VPORD512,
ssa.OpAMD64VPORQ512, ssa.OpAMD64VPORQ512,
ssa.OpAMD64VPMADDWD128,
ssa.OpAMD64VPMADDWD256,
ssa.OpAMD64VPMADDWD512,
ssa.OpAMD64VPERMB128, ssa.OpAMD64VPERMB128,
ssa.OpAMD64VPERMB256, ssa.OpAMD64VPERMB256,
ssa.OpAMD64VPERMB512, ssa.OpAMD64VPERMB512,
@ -259,9 +269,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPRORVQ128, ssa.OpAMD64VPRORVQ128,
ssa.OpAMD64VPRORVQ256, ssa.OpAMD64VPRORVQ256,
ssa.OpAMD64VPRORVQ512, ssa.OpAMD64VPRORVQ512,
ssa.OpAMD64VPMADDUBSW128,
ssa.OpAMD64VPMADDUBSW256,
ssa.OpAMD64VPMADDUBSW512,
ssa.OpAMD64VSCALEFPS128, ssa.OpAMD64VSCALEFPS128,
ssa.OpAMD64VSCALEFPS256, ssa.OpAMD64VSCALEFPS256,
ssa.OpAMD64VSCALEFPS512, ssa.OpAMD64VSCALEFPS512,
@ -295,12 +302,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPSRLVQ128, ssa.OpAMD64VPSRLVQ128,
ssa.OpAMD64VPSRLVQ256, ssa.OpAMD64VPSRLVQ256,
ssa.OpAMD64VPSRLVQ512, ssa.OpAMD64VPSRLVQ512,
ssa.OpAMD64VPSIGNB128,
ssa.OpAMD64VPSIGNB256,
ssa.OpAMD64VPSIGNW128,
ssa.OpAMD64VPSIGNW256,
ssa.OpAMD64VPSIGND128,
ssa.OpAMD64VPSIGND256,
ssa.OpAMD64VSUBPS128, ssa.OpAMD64VSUBPS128,
ssa.OpAMD64VSUBPS256, ssa.OpAMD64VSUBPS256,
ssa.OpAMD64VSUBPS512, ssa.OpAMD64VSUBPS512,
@ -335,6 +336,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPSUBSW128, ssa.OpAMD64VPSUBSW128,
ssa.OpAMD64VPSUBSW256, ssa.OpAMD64VPSUBSW256,
ssa.OpAMD64VPSUBSW512, ssa.OpAMD64VPSUBSW512,
ssa.OpAMD64VPSUBUSB128,
ssa.OpAMD64VPSUBUSB256,
ssa.OpAMD64VPSUBUSB512,
ssa.OpAMD64VPSUBUSW128,
ssa.OpAMD64VPSUBUSW256,
ssa.OpAMD64VPSUBUSW512,
ssa.OpAMD64VPXOR128, ssa.OpAMD64VPXOR128,
ssa.OpAMD64VPXOR256, ssa.OpAMD64VPXOR256,
ssa.OpAMD64VPXORD512, ssa.OpAMD64VPXORD512,
@ -375,6 +382,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPADDSWMasked128, ssa.OpAMD64VPADDSWMasked128,
ssa.OpAMD64VPADDSWMasked256, ssa.OpAMD64VPADDSWMasked256,
ssa.OpAMD64VPADDSWMasked512, ssa.OpAMD64VPADDSWMasked512,
ssa.OpAMD64VPADDUSBMasked128,
ssa.OpAMD64VPADDUSBMasked256,
ssa.OpAMD64VPADDUSBMasked512,
ssa.OpAMD64VPADDUSWMasked128,
ssa.OpAMD64VPADDUSWMasked256,
ssa.OpAMD64VPADDUSWMasked512,
ssa.OpAMD64VPANDDMasked128, ssa.OpAMD64VPANDDMasked128,
ssa.OpAMD64VPANDDMasked256, ssa.OpAMD64VPANDDMasked256,
ssa.OpAMD64VPANDDMasked512, ssa.OpAMD64VPANDDMasked512,
@ -399,6 +412,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VDIVPDMasked128, ssa.OpAMD64VDIVPDMasked128,
ssa.OpAMD64VDIVPDMasked256, ssa.OpAMD64VDIVPDMasked256,
ssa.OpAMD64VDIVPDMasked512, ssa.OpAMD64VDIVPDMasked512,
ssa.OpAMD64VPMADDWDMasked128,
ssa.OpAMD64VPMADDWDMasked256,
ssa.OpAMD64VPMADDWDMasked512,
ssa.OpAMD64VPMADDUBSWMasked128,
ssa.OpAMD64VPMADDUBSWMasked256,
ssa.OpAMD64VPMADDUBSWMasked512,
ssa.OpAMD64VGF2P8MULBMasked128, ssa.OpAMD64VGF2P8MULBMasked128,
ssa.OpAMD64VGF2P8MULBMasked256, ssa.OpAMD64VGF2P8MULBMasked256,
ssa.OpAMD64VGF2P8MULBMasked512, ssa.OpAMD64VGF2P8MULBMasked512,
@ -462,17 +481,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPMINUQMasked128, ssa.OpAMD64VPMINUQMasked128,
ssa.OpAMD64VPMINUQMasked256, ssa.OpAMD64VPMINUQMasked256,
ssa.OpAMD64VPMINUQMasked512, ssa.OpAMD64VPMINUQMasked512,
ssa.OpAMD64VPMULDQMasked128,
ssa.OpAMD64VPMULDQMasked256,
ssa.OpAMD64VPMULDQMasked512,
ssa.OpAMD64VPMULUDQMasked128,
ssa.OpAMD64VPMULUDQMasked256,
ssa.OpAMD64VPMULUDQMasked512,
ssa.OpAMD64VPMULHWMasked128,
ssa.OpAMD64VPMULHWMasked256,
ssa.OpAMD64VPMULHWMasked512,
ssa.OpAMD64VPMULHUWMasked128, ssa.OpAMD64VPMULHUWMasked128,
ssa.OpAMD64VPMULHUWMasked256, ssa.OpAMD64VPMULHWMasked256,
ssa.OpAMD64VPMULHUWMasked512, ssa.OpAMD64VPMULHUWMasked512,
ssa.OpAMD64VMULPSMasked128, ssa.OpAMD64VMULPSMasked128,
ssa.OpAMD64VMULPSMasked256, ssa.OpAMD64VMULPSMasked256,
@ -495,9 +505,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPORQMasked128, ssa.OpAMD64VPORQMasked128,
ssa.OpAMD64VPORQMasked256, ssa.OpAMD64VPORQMasked256,
ssa.OpAMD64VPORQMasked512, ssa.OpAMD64VPORQMasked512,
ssa.OpAMD64VPMADDWDMasked128,
ssa.OpAMD64VPMADDWDMasked256,
ssa.OpAMD64VPMADDWDMasked512,
ssa.OpAMD64VPERMBMasked128, ssa.OpAMD64VPERMBMasked128,
ssa.OpAMD64VPERMBMasked256, ssa.OpAMD64VPERMBMasked256,
ssa.OpAMD64VPERMBMasked512, ssa.OpAMD64VPERMBMasked512,
@ -524,9 +531,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPRORVQMasked128, ssa.OpAMD64VPRORVQMasked128,
ssa.OpAMD64VPRORVQMasked256, ssa.OpAMD64VPRORVQMasked256,
ssa.OpAMD64VPRORVQMasked512, ssa.OpAMD64VPRORVQMasked512,
ssa.OpAMD64VPMADDUBSWMasked128,
ssa.OpAMD64VPMADDUBSWMasked256,
ssa.OpAMD64VPMADDUBSWMasked512,
ssa.OpAMD64VSCALEFPSMasked128, ssa.OpAMD64VSCALEFPSMasked128,
ssa.OpAMD64VSCALEFPSMasked256, ssa.OpAMD64VSCALEFPSMasked256,
ssa.OpAMD64VSCALEFPSMasked512, ssa.OpAMD64VSCALEFPSMasked512,
@ -584,6 +588,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPSUBSWMasked128, ssa.OpAMD64VPSUBSWMasked128,
ssa.OpAMD64VPSUBSWMasked256, ssa.OpAMD64VPSUBSWMasked256,
ssa.OpAMD64VPSUBSWMasked512, ssa.OpAMD64VPSUBSWMasked512,
ssa.OpAMD64VPSUBUSBMasked128,
ssa.OpAMD64VPSUBUSBMasked256,
ssa.OpAMD64VPSUBUSBMasked512,
ssa.OpAMD64VPSUBUSWMasked128,
ssa.OpAMD64VPSUBUSWMasked256,
ssa.OpAMD64VPSUBUSWMasked512,
ssa.OpAMD64VPXORDMasked128, ssa.OpAMD64VPXORDMasked128,
ssa.OpAMD64VPXORDMasked256, ssa.OpAMD64VPXORDMasked256,
ssa.OpAMD64VPXORDMasked512, ssa.OpAMD64VPXORDMasked512,
@ -608,18 +618,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPABSQMasked128, ssa.OpAMD64VPABSQMasked128,
ssa.OpAMD64VPABSQMasked256, ssa.OpAMD64VPABSQMasked256,
ssa.OpAMD64VPABSQMasked512, ssa.OpAMD64VPABSQMasked512,
ssa.OpAMD64VRCP14PSMasked128,
ssa.OpAMD64VRCP14PSMasked256,
ssa.OpAMD64VRCP14PSMasked512,
ssa.OpAMD64VRCP14PDMasked128,
ssa.OpAMD64VRCP14PDMasked256,
ssa.OpAMD64VRCP14PDMasked512,
ssa.OpAMD64VRSQRT14PSMasked128,
ssa.OpAMD64VRSQRT14PSMasked256,
ssa.OpAMD64VRSQRT14PSMasked512,
ssa.OpAMD64VRSQRT14PDMasked128,
ssa.OpAMD64VRSQRT14PDMasked256,
ssa.OpAMD64VRSQRT14PDMasked512,
ssa.OpAMD64VCOMPRESSPSMasked128, ssa.OpAMD64VCOMPRESSPSMasked128,
ssa.OpAMD64VCOMPRESSPSMasked256, ssa.OpAMD64VCOMPRESSPSMasked256,
ssa.OpAMD64VCOMPRESSPSMasked512, ssa.OpAMD64VCOMPRESSPSMasked512,
@ -674,6 +672,18 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPOPCNTQMasked128, ssa.OpAMD64VPOPCNTQMasked128,
ssa.OpAMD64VPOPCNTQMasked256, ssa.OpAMD64VPOPCNTQMasked256,
ssa.OpAMD64VPOPCNTQMasked512, ssa.OpAMD64VPOPCNTQMasked512,
ssa.OpAMD64VRCP14PSMasked128,
ssa.OpAMD64VRCP14PSMasked256,
ssa.OpAMD64VRCP14PSMasked512,
ssa.OpAMD64VRCP14PDMasked128,
ssa.OpAMD64VRCP14PDMasked256,
ssa.OpAMD64VRCP14PDMasked512,
ssa.OpAMD64VRSQRT14PSMasked128,
ssa.OpAMD64VRSQRT14PSMasked256,
ssa.OpAMD64VRSQRT14PSMasked512,
ssa.OpAMD64VRSQRT14PDMasked128,
ssa.OpAMD64VRSQRT14PDMasked256,
ssa.OpAMD64VRSQRT14PDMasked512,
ssa.OpAMD64VSQRTPSMasked128, ssa.OpAMD64VSQRTPSMasked128,
ssa.OpAMD64VSQRTPSMasked256, ssa.OpAMD64VSQRTPSMasked256,
ssa.OpAMD64VSQRTPSMasked512, ssa.OpAMD64VSQRTPSMasked512,
@ -800,10 +810,7 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPSRAQMasked512const: ssa.OpAMD64VPSRAQMasked512const:
p = simdVkvImm8(s, v) p = simdVkvImm8(s, v)
case ssa.OpAMD64VDPPS128, case ssa.OpAMD64VCMPPS128,
ssa.OpAMD64VDPPS256,
ssa.OpAMD64VDPPD128,
ssa.OpAMD64VCMPPS128,
ssa.OpAMD64VCMPPS256, ssa.OpAMD64VCMPPS256,
ssa.OpAMD64VCMPPD128, ssa.OpAMD64VCMPPD128,
ssa.OpAMD64VCMPPD256, ssa.OpAMD64VCMPPD256,
@ -900,6 +907,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
case ssa.OpAMD64VPDPWSSD128, case ssa.OpAMD64VPDPWSSD128,
ssa.OpAMD64VPDPWSSD256, ssa.OpAMD64VPDPWSSD256,
ssa.OpAMD64VPDPWSSD512, ssa.OpAMD64VPDPWSSD512,
ssa.OpAMD64VPDPWSSDS128,
ssa.OpAMD64VPDPWSSDS256,
ssa.OpAMD64VPDPWSSDS512,
ssa.OpAMD64VPDPBUSD128,
ssa.OpAMD64VPDPBUSD256,
ssa.OpAMD64VPDPBUSD512,
ssa.OpAMD64VPDPBUSDS128,
ssa.OpAMD64VPDPBUSDS256,
ssa.OpAMD64VPDPBUSDS512,
ssa.OpAMD64VFMADD213PS128, ssa.OpAMD64VFMADD213PS128,
ssa.OpAMD64VFMADD213PS256, ssa.OpAMD64VFMADD213PS256,
ssa.OpAMD64VFMADD213PS512, ssa.OpAMD64VFMADD213PS512,
@ -936,12 +952,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPERMI2Q256, ssa.OpAMD64VPERMI2Q256,
ssa.OpAMD64VPERMI2PD512, ssa.OpAMD64VPERMI2PD512,
ssa.OpAMD64VPERMI2Q512, ssa.OpAMD64VPERMI2Q512,
ssa.OpAMD64VPDPWSSDS128,
ssa.OpAMD64VPDPWSSDS256,
ssa.OpAMD64VPDPWSSDS512,
ssa.OpAMD64VPDPBUSDS128,
ssa.OpAMD64VPDPBUSDS256,
ssa.OpAMD64VPDPBUSDS512,
ssa.OpAMD64VPSHLDVW128, ssa.OpAMD64VPSHLDVW128,
ssa.OpAMD64VPSHLDVW256, ssa.OpAMD64VPSHLDVW256,
ssa.OpAMD64VPSHLDVW512, ssa.OpAMD64VPSHLDVW512,
@ -959,15 +969,21 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPSHRDVD512, ssa.OpAMD64VPSHRDVD512,
ssa.OpAMD64VPSHRDVQ128, ssa.OpAMD64VPSHRDVQ128,
ssa.OpAMD64VPSHRDVQ256, ssa.OpAMD64VPSHRDVQ256,
ssa.OpAMD64VPSHRDVQ512, ssa.OpAMD64VPSHRDVQ512:
ssa.OpAMD64VPDPBUSD128,
ssa.OpAMD64VPDPBUSD256,
ssa.OpAMD64VPDPBUSD512:
p = simdV31ResultInArg0(s, v) p = simdV31ResultInArg0(s, v)
case ssa.OpAMD64VPDPWSSDMasked128, case ssa.OpAMD64VPDPWSSDMasked128,
ssa.OpAMD64VPDPWSSDMasked256, ssa.OpAMD64VPDPWSSDMasked256,
ssa.OpAMD64VPDPWSSDMasked512, ssa.OpAMD64VPDPWSSDMasked512,
ssa.OpAMD64VPDPWSSDSMasked128,
ssa.OpAMD64VPDPWSSDSMasked256,
ssa.OpAMD64VPDPWSSDSMasked512,
ssa.OpAMD64VPDPBUSDMasked128,
ssa.OpAMD64VPDPBUSDMasked256,
ssa.OpAMD64VPDPBUSDMasked512,
ssa.OpAMD64VPDPBUSDSMasked128,
ssa.OpAMD64VPDPBUSDSMasked256,
ssa.OpAMD64VPDPBUSDSMasked512,
ssa.OpAMD64VFMADD213PSMasked128, ssa.OpAMD64VFMADD213PSMasked128,
ssa.OpAMD64VFMADD213PSMasked256, ssa.OpAMD64VFMADD213PSMasked256,
ssa.OpAMD64VFMADD213PSMasked512, ssa.OpAMD64VFMADD213PSMasked512,
@ -1004,12 +1020,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPERMI2QMasked256, ssa.OpAMD64VPERMI2QMasked256,
ssa.OpAMD64VPERMI2PDMasked512, ssa.OpAMD64VPERMI2PDMasked512,
ssa.OpAMD64VPERMI2QMasked512, ssa.OpAMD64VPERMI2QMasked512,
ssa.OpAMD64VPDPWSSDSMasked128,
ssa.OpAMD64VPDPWSSDSMasked256,
ssa.OpAMD64VPDPWSSDSMasked512,
ssa.OpAMD64VPDPBUSDSMasked128,
ssa.OpAMD64VPDPBUSDSMasked256,
ssa.OpAMD64VPDPBUSDSMasked512,
ssa.OpAMD64VPSHLDVWMasked128, ssa.OpAMD64VPSHLDVWMasked128,
ssa.OpAMD64VPSHLDVWMasked256, ssa.OpAMD64VPSHLDVWMasked256,
ssa.OpAMD64VPSHLDVWMasked512, ssa.OpAMD64VPSHLDVWMasked512,
@ -1027,10 +1037,7 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPSHRDVDMasked512, ssa.OpAMD64VPSHRDVDMasked512,
ssa.OpAMD64VPSHRDVQMasked128, ssa.OpAMD64VPSHRDVQMasked128,
ssa.OpAMD64VPSHRDVQMasked256, ssa.OpAMD64VPSHRDVQMasked256,
ssa.OpAMD64VPSHRDVQMasked512, ssa.OpAMD64VPSHRDVQMasked512:
ssa.OpAMD64VPDPBUSDMasked128,
ssa.OpAMD64VPDPBUSDMasked256,
ssa.OpAMD64VPDPBUSDMasked512:
p = simdV3kvResultInArg0(s, v) p = simdV3kvResultInArg0(s, v)
case ssa.OpAMD64VPSLLW128, case ssa.OpAMD64VPSLLW128,
@ -1151,6 +1158,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPDPWSSDMasked128, ssa.OpAMD64VPDPWSSDMasked128,
ssa.OpAMD64VPDPWSSDMasked256, ssa.OpAMD64VPDPWSSDMasked256,
ssa.OpAMD64VPDPWSSDMasked512, ssa.OpAMD64VPDPWSSDMasked512,
ssa.OpAMD64VPDPWSSDSMasked128,
ssa.OpAMD64VPDPWSSDSMasked256,
ssa.OpAMD64VPDPWSSDSMasked512,
ssa.OpAMD64VPDPBUSDMasked128,
ssa.OpAMD64VPDPBUSDMasked256,
ssa.OpAMD64VPDPBUSDMasked512,
ssa.OpAMD64VPDPBUSDSMasked128,
ssa.OpAMD64VPDPBUSDSMasked256,
ssa.OpAMD64VPDPBUSDSMasked512,
ssa.OpAMD64VADDPSMasked128, ssa.OpAMD64VADDPSMasked128,
ssa.OpAMD64VADDPSMasked256, ssa.OpAMD64VADDPSMasked256,
ssa.OpAMD64VADDPSMasked512, ssa.OpAMD64VADDPSMasked512,
@ -1175,6 +1191,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPADDSWMasked128, ssa.OpAMD64VPADDSWMasked128,
ssa.OpAMD64VPADDSWMasked256, ssa.OpAMD64VPADDSWMasked256,
ssa.OpAMD64VPADDSWMasked512, ssa.OpAMD64VPADDSWMasked512,
ssa.OpAMD64VPADDUSBMasked128,
ssa.OpAMD64VPADDUSBMasked256,
ssa.OpAMD64VPADDUSBMasked512,
ssa.OpAMD64VPADDUSWMasked128,
ssa.OpAMD64VPADDUSWMasked256,
ssa.OpAMD64VPADDUSWMasked512,
ssa.OpAMD64VPANDDMasked128, ssa.OpAMD64VPANDDMasked128,
ssa.OpAMD64VPANDDMasked256, ssa.OpAMD64VPANDDMasked256,
ssa.OpAMD64VPANDDMasked512, ssa.OpAMD64VPANDDMasked512,
@ -1187,18 +1209,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPANDNQMasked128, ssa.OpAMD64VPANDNQMasked128,
ssa.OpAMD64VPANDNQMasked256, ssa.OpAMD64VPANDNQMasked256,
ssa.OpAMD64VPANDNQMasked512, ssa.OpAMD64VPANDNQMasked512,
ssa.OpAMD64VRCP14PSMasked128,
ssa.OpAMD64VRCP14PSMasked256,
ssa.OpAMD64VRCP14PSMasked512,
ssa.OpAMD64VRCP14PDMasked128,
ssa.OpAMD64VRCP14PDMasked256,
ssa.OpAMD64VRCP14PDMasked512,
ssa.OpAMD64VRSQRT14PSMasked128,
ssa.OpAMD64VRSQRT14PSMasked256,
ssa.OpAMD64VRSQRT14PSMasked512,
ssa.OpAMD64VRSQRT14PDMasked128,
ssa.OpAMD64VRSQRT14PDMasked256,
ssa.OpAMD64VRSQRT14PDMasked512,
ssa.OpAMD64VPAVGBMasked128, ssa.OpAMD64VPAVGBMasked128,
ssa.OpAMD64VPAVGBMasked256, ssa.OpAMD64VPAVGBMasked256,
ssa.OpAMD64VPAVGBMasked512, ssa.OpAMD64VPAVGBMasked512,
@ -1247,6 +1257,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VDIVPDMasked128, ssa.OpAMD64VDIVPDMasked128,
ssa.OpAMD64VDIVPDMasked256, ssa.OpAMD64VDIVPDMasked256,
ssa.OpAMD64VDIVPDMasked512, ssa.OpAMD64VDIVPDMasked512,
ssa.OpAMD64VPMADDWDMasked128,
ssa.OpAMD64VPMADDWDMasked256,
ssa.OpAMD64VPMADDWDMasked512,
ssa.OpAMD64VPMADDUBSWMasked128,
ssa.OpAMD64VPMADDUBSWMasked256,
ssa.OpAMD64VPMADDUBSWMasked512,
ssa.OpAMD64VEXPANDPSMasked128, ssa.OpAMD64VEXPANDPSMasked128,
ssa.OpAMD64VEXPANDPSMasked256, ssa.OpAMD64VEXPANDPSMasked256,
ssa.OpAMD64VEXPANDPSMasked512, ssa.OpAMD64VEXPANDPSMasked512,
@ -1265,24 +1281,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPEXPANDQMasked128, ssa.OpAMD64VPEXPANDQMasked128,
ssa.OpAMD64VPEXPANDQMasked256, ssa.OpAMD64VPEXPANDQMasked256,
ssa.OpAMD64VPEXPANDQMasked512, ssa.OpAMD64VPEXPANDQMasked512,
ssa.OpAMD64VFMADD213PSMasked128,
ssa.OpAMD64VFMADD213PSMasked256,
ssa.OpAMD64VFMADD213PSMasked512,
ssa.OpAMD64VFMADD213PDMasked128,
ssa.OpAMD64VFMADD213PDMasked256,
ssa.OpAMD64VFMADD213PDMasked512,
ssa.OpAMD64VFMADDSUB213PSMasked128,
ssa.OpAMD64VFMADDSUB213PSMasked256,
ssa.OpAMD64VFMADDSUB213PSMasked512,
ssa.OpAMD64VFMADDSUB213PDMasked128,
ssa.OpAMD64VFMADDSUB213PDMasked256,
ssa.OpAMD64VFMADDSUB213PDMasked512,
ssa.OpAMD64VFMSUBADD213PSMasked128,
ssa.OpAMD64VFMSUBADD213PSMasked256,
ssa.OpAMD64VFMSUBADD213PSMasked512,
ssa.OpAMD64VFMSUBADD213PDMasked128,
ssa.OpAMD64VFMSUBADD213PDMasked256,
ssa.OpAMD64VFMSUBADD213PDMasked512,
ssa.OpAMD64VGF2P8AFFINEINVQBMasked128, ssa.OpAMD64VGF2P8AFFINEINVQBMasked128,
ssa.OpAMD64VGF2P8AFFINEINVQBMasked256, ssa.OpAMD64VGF2P8AFFINEINVQBMasked256,
ssa.OpAMD64VGF2P8AFFINEINVQBMasked512, ssa.OpAMD64VGF2P8AFFINEINVQBMasked512,
@ -1352,17 +1350,20 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPMINUQMasked128, ssa.OpAMD64VPMINUQMasked128,
ssa.OpAMD64VPMINUQMasked256, ssa.OpAMD64VPMINUQMasked256,
ssa.OpAMD64VPMINUQMasked512, ssa.OpAMD64VPMINUQMasked512,
ssa.OpAMD64VPMULDQMasked128, ssa.OpAMD64VFMADD213PSMasked128,
ssa.OpAMD64VPMULDQMasked256, ssa.OpAMD64VFMADD213PSMasked256,
ssa.OpAMD64VPMULDQMasked512, ssa.OpAMD64VFMADD213PSMasked512,
ssa.OpAMD64VPMULUDQMasked128, ssa.OpAMD64VFMADD213PDMasked128,
ssa.OpAMD64VPMULUDQMasked256, ssa.OpAMD64VFMADD213PDMasked256,
ssa.OpAMD64VPMULUDQMasked512, ssa.OpAMD64VFMADD213PDMasked512,
ssa.OpAMD64VPMULHWMasked128, ssa.OpAMD64VFMADDSUB213PSMasked128,
ssa.OpAMD64VPMULHWMasked256, ssa.OpAMD64VFMADDSUB213PSMasked256,
ssa.OpAMD64VPMULHWMasked512, ssa.OpAMD64VFMADDSUB213PSMasked512,
ssa.OpAMD64VFMADDSUB213PDMasked128,
ssa.OpAMD64VFMADDSUB213PDMasked256,
ssa.OpAMD64VFMADDSUB213PDMasked512,
ssa.OpAMD64VPMULHUWMasked128, ssa.OpAMD64VPMULHUWMasked128,
ssa.OpAMD64VPMULHUWMasked256, ssa.OpAMD64VPMULHWMasked256,
ssa.OpAMD64VPMULHUWMasked512, ssa.OpAMD64VPMULHUWMasked512,
ssa.OpAMD64VMULPSMasked128, ssa.OpAMD64VMULPSMasked128,
ssa.OpAMD64VMULPSMasked256, ssa.OpAMD64VMULPSMasked256,
@ -1379,15 +1380,30 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPMULLQMasked128, ssa.OpAMD64VPMULLQMasked128,
ssa.OpAMD64VPMULLQMasked256, ssa.OpAMD64VPMULLQMasked256,
ssa.OpAMD64VPMULLQMasked512, ssa.OpAMD64VPMULLQMasked512,
ssa.OpAMD64VFMSUBADD213PSMasked128,
ssa.OpAMD64VFMSUBADD213PSMasked256,
ssa.OpAMD64VFMSUBADD213PSMasked512,
ssa.OpAMD64VFMSUBADD213PDMasked128,
ssa.OpAMD64VFMSUBADD213PDMasked256,
ssa.OpAMD64VFMSUBADD213PDMasked512,
ssa.OpAMD64VPOPCNTBMasked128,
ssa.OpAMD64VPOPCNTBMasked256,
ssa.OpAMD64VPOPCNTBMasked512,
ssa.OpAMD64VPOPCNTWMasked128,
ssa.OpAMD64VPOPCNTWMasked256,
ssa.OpAMD64VPOPCNTWMasked512,
ssa.OpAMD64VPOPCNTDMasked128,
ssa.OpAMD64VPOPCNTDMasked256,
ssa.OpAMD64VPOPCNTDMasked512,
ssa.OpAMD64VPOPCNTQMasked128,
ssa.OpAMD64VPOPCNTQMasked256,
ssa.OpAMD64VPOPCNTQMasked512,
ssa.OpAMD64VPORDMasked128, ssa.OpAMD64VPORDMasked128,
ssa.OpAMD64VPORDMasked256, ssa.OpAMD64VPORDMasked256,
ssa.OpAMD64VPORDMasked512, ssa.OpAMD64VPORDMasked512,
ssa.OpAMD64VPORQMasked128, ssa.OpAMD64VPORQMasked128,
ssa.OpAMD64VPORQMasked256, ssa.OpAMD64VPORQMasked256,
ssa.OpAMD64VPORQMasked512, ssa.OpAMD64VPORQMasked512,
ssa.OpAMD64VPMADDWDMasked128,
ssa.OpAMD64VPMADDWDMasked256,
ssa.OpAMD64VPMADDWDMasked512,
ssa.OpAMD64VPERMI2BMasked128, ssa.OpAMD64VPERMI2BMasked128,
ssa.OpAMD64VPERMI2BMasked256, ssa.OpAMD64VPERMI2BMasked256,
ssa.OpAMD64VPERMI2BMasked512, ssa.OpAMD64VPERMI2BMasked512,
@ -1420,18 +1436,18 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPERMQMasked256, ssa.OpAMD64VPERMQMasked256,
ssa.OpAMD64VPERMPDMasked512, ssa.OpAMD64VPERMPDMasked512,
ssa.OpAMD64VPERMQMasked512, ssa.OpAMD64VPERMQMasked512,
ssa.OpAMD64VPOPCNTBMasked128, ssa.OpAMD64VRCP14PSMasked128,
ssa.OpAMD64VPOPCNTBMasked256, ssa.OpAMD64VRCP14PSMasked256,
ssa.OpAMD64VPOPCNTBMasked512, ssa.OpAMD64VRCP14PSMasked512,
ssa.OpAMD64VPOPCNTWMasked128, ssa.OpAMD64VRCP14PDMasked128,
ssa.OpAMD64VPOPCNTWMasked256, ssa.OpAMD64VRCP14PDMasked256,
ssa.OpAMD64VPOPCNTWMasked512, ssa.OpAMD64VRCP14PDMasked512,
ssa.OpAMD64VPOPCNTDMasked128, ssa.OpAMD64VRSQRT14PSMasked128,
ssa.OpAMD64VPOPCNTDMasked256, ssa.OpAMD64VRSQRT14PSMasked256,
ssa.OpAMD64VPOPCNTDMasked512, ssa.OpAMD64VRSQRT14PSMasked512,
ssa.OpAMD64VPOPCNTQMasked128, ssa.OpAMD64VRSQRT14PDMasked128,
ssa.OpAMD64VPOPCNTQMasked256, ssa.OpAMD64VRSQRT14PDMasked256,
ssa.OpAMD64VPOPCNTQMasked512, ssa.OpAMD64VRSQRT14PDMasked512,
ssa.OpAMD64VPROLDMasked128, ssa.OpAMD64VPROLDMasked128,
ssa.OpAMD64VPROLDMasked256, ssa.OpAMD64VPROLDMasked256,
ssa.OpAMD64VPROLDMasked512, ssa.OpAMD64VPROLDMasked512,
@ -1456,15 +1472,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPRORVQMasked128, ssa.OpAMD64VPRORVQMasked128,
ssa.OpAMD64VPRORVQMasked256, ssa.OpAMD64VPRORVQMasked256,
ssa.OpAMD64VPRORVQMasked512, ssa.OpAMD64VPRORVQMasked512,
ssa.OpAMD64VPDPWSSDSMasked128,
ssa.OpAMD64VPDPWSSDSMasked256,
ssa.OpAMD64VPDPWSSDSMasked512,
ssa.OpAMD64VPMADDUBSWMasked128,
ssa.OpAMD64VPMADDUBSWMasked256,
ssa.OpAMD64VPMADDUBSWMasked512,
ssa.OpAMD64VPDPBUSDSMasked128,
ssa.OpAMD64VPDPBUSDSMasked256,
ssa.OpAMD64VPDPBUSDSMasked512,
ssa.OpAMD64VSCALEFPSMasked128, ssa.OpAMD64VSCALEFPSMasked128,
ssa.OpAMD64VSCALEFPSMasked256, ssa.OpAMD64VSCALEFPSMasked256,
ssa.OpAMD64VSCALEFPSMasked512, ssa.OpAMD64VSCALEFPSMasked512,
@ -1591,9 +1598,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPSUBSWMasked128, ssa.OpAMD64VPSUBSWMasked128,
ssa.OpAMD64VPSUBSWMasked256, ssa.OpAMD64VPSUBSWMasked256,
ssa.OpAMD64VPSUBSWMasked512, ssa.OpAMD64VPSUBSWMasked512,
ssa.OpAMD64VPDPBUSDMasked128, ssa.OpAMD64VPSUBUSBMasked128,
ssa.OpAMD64VPDPBUSDMasked256, ssa.OpAMD64VPSUBUSBMasked256,
ssa.OpAMD64VPDPBUSDMasked512, ssa.OpAMD64VPSUBUSBMasked512,
ssa.OpAMD64VPSUBUSWMasked128,
ssa.OpAMD64VPSUBUSWMasked256,
ssa.OpAMD64VPSUBUSWMasked512,
ssa.OpAMD64VPXORDMasked128, ssa.OpAMD64VPXORDMasked128,
ssa.OpAMD64VPXORDMasked256, ssa.OpAMD64VPXORDMasked256,
ssa.OpAMD64VPXORDMasked512, ssa.OpAMD64VPXORDMasked512,

View file

@ -1,29 +1,29 @@
// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. // Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
(AbsoluteInt8x16 ...) => (VPABSB128 ...) (AbsInt8x16 ...) => (VPABSB128 ...)
(AbsoluteInt8x32 ...) => (VPABSB256 ...) (AbsInt8x32 ...) => (VPABSB256 ...)
(AbsoluteInt8x64 ...) => (VPABSB512 ...) (AbsInt8x64 ...) => (VPABSB512 ...)
(AbsoluteInt16x8 ...) => (VPABSW128 ...) (AbsInt16x8 ...) => (VPABSW128 ...)
(AbsoluteInt16x16 ...) => (VPABSW256 ...) (AbsInt16x16 ...) => (VPABSW256 ...)
(AbsoluteInt16x32 ...) => (VPABSW512 ...) (AbsInt16x32 ...) => (VPABSW512 ...)
(AbsoluteInt32x4 ...) => (VPABSD128 ...) (AbsInt32x4 ...) => (VPABSD128 ...)
(AbsoluteInt32x8 ...) => (VPABSD256 ...) (AbsInt32x8 ...) => (VPABSD256 ...)
(AbsoluteInt32x16 ...) => (VPABSD512 ...) (AbsInt32x16 ...) => (VPABSD512 ...)
(AbsoluteInt64x2 ...) => (VPABSQ128 ...) (AbsInt64x2 ...) => (VPABSQ128 ...)
(AbsoluteInt64x4 ...) => (VPABSQ256 ...) (AbsInt64x4 ...) => (VPABSQ256 ...)
(AbsoluteInt64x8 ...) => (VPABSQ512 ...) (AbsInt64x8 ...) => (VPABSQ512 ...)
(AbsoluteMaskedInt8x16 x mask) => (VPABSBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask)) (AbsMaskedInt8x16 x mask) => (VPABSBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
(AbsoluteMaskedInt8x32 x mask) => (VPABSBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask)) (AbsMaskedInt8x32 x mask) => (VPABSBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
(AbsoluteMaskedInt8x64 x mask) => (VPABSBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask)) (AbsMaskedInt8x64 x mask) => (VPABSBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
(AbsoluteMaskedInt16x8 x mask) => (VPABSWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask)) (AbsMaskedInt16x8 x mask) => (VPABSWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
(AbsoluteMaskedInt16x16 x mask) => (VPABSWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask)) (AbsMaskedInt16x16 x mask) => (VPABSWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
(AbsoluteMaskedInt16x32 x mask) => (VPABSWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask)) (AbsMaskedInt16x32 x mask) => (VPABSWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
(AbsoluteMaskedInt32x4 x mask) => (VPABSDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask)) (AbsMaskedInt32x4 x mask) => (VPABSDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(AbsoluteMaskedInt32x8 x mask) => (VPABSDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask)) (AbsMaskedInt32x8 x mask) => (VPABSDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
(AbsoluteMaskedInt32x16 x mask) => (VPABSDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask)) (AbsMaskedInt32x16 x mask) => (VPABSDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
(AbsoluteMaskedInt64x2 x mask) => (VPABSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask)) (AbsMaskedInt64x2 x mask) => (VPABSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
(AbsoluteMaskedInt64x4 x mask) => (VPABSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask)) (AbsMaskedInt64x4 x mask) => (VPABSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
(AbsoluteMaskedInt64x8 x mask) => (VPABSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask)) (AbsMaskedInt64x8 x mask) => (VPABSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
(AddFloat32x4 ...) => (VADDPS128 ...) (AddFloat32x4 ...) => (VADDPS128 ...)
(AddFloat32x8 ...) => (VADDPS256 ...) (AddFloat32x8 ...) => (VADDPS256 ...)
(AddFloat32x16 ...) => (VADDPS512 ...) (AddFloat32x16 ...) => (VADDPS512 ...)
@ -54,12 +54,24 @@
(AddUint64x2 ...) => (VPADDQ128 ...) (AddUint64x2 ...) => (VPADDQ128 ...)
(AddUint64x4 ...) => (VPADDQ256 ...) (AddUint64x4 ...) => (VPADDQ256 ...)
(AddUint64x8 ...) => (VPADDQ512 ...) (AddUint64x8 ...) => (VPADDQ512 ...)
(AddDotProdInt32x4 ...) => (VPDPWSSD128 ...) (AddDotProdPairsSaturatedInt32x4 ...) => (VPDPWSSDS128 ...)
(AddDotProdInt32x8 ...) => (VPDPWSSD256 ...) (AddDotProdPairsSaturatedInt32x8 ...) => (VPDPWSSDS256 ...)
(AddDotProdInt32x16 ...) => (VPDPWSSD512 ...) (AddDotProdPairsSaturatedInt32x16 ...) => (VPDPWSSDS512 ...)
(AddDotProdMaskedInt32x4 x y z mask) => (VPDPWSSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask)) (AddDotProdPairsSaturatedMaskedInt32x4 x y z mask) => (VPDPWSSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
(AddDotProdMaskedInt32x8 x y z mask) => (VPDPWSSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask)) (AddDotProdPairsSaturatedMaskedInt32x8 x y z mask) => (VPDPWSSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
(AddDotProdMaskedInt32x16 x y z mask) => (VPDPWSSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask)) (AddDotProdPairsSaturatedMaskedInt32x16 x y z mask) => (VPDPWSSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
(AddDotProdQuadrupleInt32x4 ...) => (VPDPBUSD128 ...)
(AddDotProdQuadrupleInt32x8 ...) => (VPDPBUSD256 ...)
(AddDotProdQuadrupleInt32x16 ...) => (VPDPBUSD512 ...)
(AddDotProdQuadrupleMaskedInt32x4 x y z mask) => (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
(AddDotProdQuadrupleMaskedInt32x8 x y z mask) => (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
(AddDotProdQuadrupleMaskedInt32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
(AddDotProdQuadrupleSaturatedInt32x4 ...) => (VPDPBUSDS128 ...)
(AddDotProdQuadrupleSaturatedInt32x8 ...) => (VPDPBUSDS256 ...)
(AddDotProdQuadrupleSaturatedInt32x16 ...) => (VPDPBUSDS512 ...)
(AddDotProdQuadrupleSaturatedMaskedInt32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
(AddDotProdQuadrupleSaturatedMaskedInt32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
(AddDotProdQuadrupleSaturatedMaskedInt32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
(AddMaskedFloat32x4 x y mask) => (VADDPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask)) (AddMaskedFloat32x4 x y mask) => (VADDPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(AddMaskedFloat32x8 x y mask) => (VADDPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask)) (AddMaskedFloat32x8 x y mask) => (VADDPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(AddMaskedFloat32x16 x y mask) => (VADDPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask)) (AddMaskedFloat32x16 x y mask) => (VADDPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
@ -110,24 +122,24 @@
(AddSaturatedInt16x8 ...) => (VPADDSW128 ...) (AddSaturatedInt16x8 ...) => (VPADDSW128 ...)
(AddSaturatedInt16x16 ...) => (VPADDSW256 ...) (AddSaturatedInt16x16 ...) => (VPADDSW256 ...)
(AddSaturatedInt16x32 ...) => (VPADDSW512 ...) (AddSaturatedInt16x32 ...) => (VPADDSW512 ...)
(AddSaturatedUint8x16 ...) => (VPADDSB128 ...) (AddSaturatedUint8x16 ...) => (VPADDUSB128 ...)
(AddSaturatedUint8x32 ...) => (VPADDSB256 ...) (AddSaturatedUint8x32 ...) => (VPADDUSB256 ...)
(AddSaturatedUint8x64 ...) => (VPADDSB512 ...) (AddSaturatedUint8x64 ...) => (VPADDUSB512 ...)
(AddSaturatedUint16x8 ...) => (VPADDSW128 ...) (AddSaturatedUint16x8 ...) => (VPADDUSW128 ...)
(AddSaturatedUint16x16 ...) => (VPADDSW256 ...) (AddSaturatedUint16x16 ...) => (VPADDUSW256 ...)
(AddSaturatedUint16x32 ...) => (VPADDSW512 ...) (AddSaturatedUint16x32 ...) => (VPADDUSW512 ...)
(AddSaturatedMaskedInt8x16 x y mask) => (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask)) (AddSaturatedMaskedInt8x16 x y mask) => (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
(AddSaturatedMaskedInt8x32 x y mask) => (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask)) (AddSaturatedMaskedInt8x32 x y mask) => (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
(AddSaturatedMaskedInt8x64 x y mask) => (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask)) (AddSaturatedMaskedInt8x64 x y mask) => (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
(AddSaturatedMaskedInt16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask)) (AddSaturatedMaskedInt16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(AddSaturatedMaskedInt16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask)) (AddSaturatedMaskedInt16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(AddSaturatedMaskedInt16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask)) (AddSaturatedMaskedInt16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(AddSaturatedMaskedUint8x16 x y mask) => (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask)) (AddSaturatedMaskedUint8x16 x y mask) => (VPADDUSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
(AddSaturatedMaskedUint8x32 x y mask) => (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask)) (AddSaturatedMaskedUint8x32 x y mask) => (VPADDUSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
(AddSaturatedMaskedUint8x64 x y mask) => (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask)) (AddSaturatedMaskedUint8x64 x y mask) => (VPADDUSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
(AddSaturatedMaskedUint16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask)) (AddSaturatedMaskedUint16x8 x y mask) => (VPADDUSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(AddSaturatedMaskedUint16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask)) (AddSaturatedMaskedUint16x16 x y mask) => (VPADDUSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(AddSaturatedMaskedUint16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask)) (AddSaturatedMaskedUint16x32 x y mask) => (VPADDUSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(AddSubFloat32x4 ...) => (VADDSUBPS128 ...) (AddSubFloat32x4 ...) => (VADDSUBPS128 ...)
(AddSubFloat32x8 ...) => (VADDSUBPS256 ...) (AddSubFloat32x8 ...) => (VADDSUBPS256 ...)
(AddSubFloat64x2 ...) => (VADDSUBPD128 ...) (AddSubFloat64x2 ...) => (VADDSUBPD128 ...)
@ -204,30 +216,6 @@
(AndNotMaskedUint64x2 x y mask) => (VPANDNQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask)) (AndNotMaskedUint64x2 x y mask) => (VPANDNQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(AndNotMaskedUint64x4 x y mask) => (VPANDNQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask)) (AndNotMaskedUint64x4 x y mask) => (VPANDNQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(AndNotMaskedUint64x8 x y mask) => (VPANDNQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask)) (AndNotMaskedUint64x8 x y mask) => (VPANDNQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
(ApproximateReciprocalFloat32x4 ...) => (VRCPPS128 ...)
(ApproximateReciprocalFloat32x8 ...) => (VRCPPS256 ...)
(ApproximateReciprocalFloat32x16 ...) => (VRCP14PS512 ...)
(ApproximateReciprocalFloat64x2 ...) => (VRCP14PD128 ...)
(ApproximateReciprocalFloat64x4 ...) => (VRCP14PD256 ...)
(ApproximateReciprocalFloat64x8 ...) => (VRCP14PD512 ...)
(ApproximateReciprocalMaskedFloat32x4 x mask) => (VRCP14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(ApproximateReciprocalMaskedFloat32x8 x mask) => (VRCP14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
(ApproximateReciprocalMaskedFloat32x16 x mask) => (VRCP14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
(ApproximateReciprocalMaskedFloat64x2 x mask) => (VRCP14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
(ApproximateReciprocalMaskedFloat64x4 x mask) => (VRCP14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
(ApproximateReciprocalMaskedFloat64x8 x mask) => (VRCP14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
(ApproximateReciprocalOfSqrtFloat32x4 ...) => (VRSQRTPS128 ...)
(ApproximateReciprocalOfSqrtFloat32x8 ...) => (VRSQRTPS256 ...)
(ApproximateReciprocalOfSqrtFloat32x16 ...) => (VRSQRT14PS512 ...)
(ApproximateReciprocalOfSqrtFloat64x2 ...) => (VRSQRT14PD128 ...)
(ApproximateReciprocalOfSqrtFloat64x4 ...) => (VRSQRT14PD256 ...)
(ApproximateReciprocalOfSqrtFloat64x8 ...) => (VRSQRT14PD512 ...)
(ApproximateReciprocalOfSqrtMaskedFloat32x4 x mask) => (VRSQRT14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(ApproximateReciprocalOfSqrtMaskedFloat32x8 x mask) => (VRSQRT14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
(ApproximateReciprocalOfSqrtMaskedFloat32x16 x mask) => (VRSQRT14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
(ApproximateReciprocalOfSqrtMaskedFloat64x2 x mask) => (VRSQRT14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
(ApproximateReciprocalOfSqrtMaskedFloat64x4 x mask) => (VRSQRT14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
(ApproximateReciprocalOfSqrtMaskedFloat64x8 x mask) => (VRSQRT14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
(AverageUint8x16 ...) => (VPAVGB128 ...) (AverageUint8x16 ...) => (VPAVGB128 ...)
(AverageUint8x32 ...) => (VPAVGB256 ...) (AverageUint8x32 ...) => (VPAVGB256 ...)
(AverageUint8x64 ...) => (VPAVGB512 ...) (AverageUint8x64 ...) => (VPAVGB512 ...)
@ -310,6 +298,12 @@
(ConvertToUint32MaskedFloat32x4 x mask) => (VCVTPS2UDQMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask)) (ConvertToUint32MaskedFloat32x4 x mask) => (VCVTPS2UDQMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(ConvertToUint32MaskedFloat32x8 x mask) => (VCVTPS2UDQMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask)) (ConvertToUint32MaskedFloat32x8 x mask) => (VCVTPS2UDQMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
(ConvertToUint32MaskedFloat32x16 x mask) => (VCVTPS2UDQMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask)) (ConvertToUint32MaskedFloat32x16 x mask) => (VCVTPS2UDQMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
(CopySignInt8x16 ...) => (VPSIGNB128 ...)
(CopySignInt8x32 ...) => (VPSIGNB256 ...)
(CopySignInt16x8 ...) => (VPSIGNW128 ...)
(CopySignInt16x16 ...) => (VPSIGNW256 ...)
(CopySignInt32x4 ...) => (VPSIGND128 ...)
(CopySignInt32x8 ...) => (VPSIGND256 ...)
(DivFloat32x4 ...) => (VDIVPS128 ...) (DivFloat32x4 ...) => (VDIVPS128 ...)
(DivFloat32x8 ...) => (VDIVPS256 ...) (DivFloat32x8 ...) => (VDIVPS256 ...)
(DivFloat32x16 ...) => (VDIVPS512 ...) (DivFloat32x16 ...) => (VDIVPS512 ...)
@ -322,9 +316,18 @@
(DivMaskedFloat64x2 x y mask) => (VDIVPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask)) (DivMaskedFloat64x2 x y mask) => (VDIVPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(DivMaskedFloat64x4 x y mask) => (VDIVPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask)) (DivMaskedFloat64x4 x y mask) => (VDIVPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(DivMaskedFloat64x8 x y mask) => (VDIVPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask)) (DivMaskedFloat64x8 x y mask) => (VDIVPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
(DotProdBroadcastFloat32x4 x y) => (VDPPS128 [127] x y) (DotProdPairsInt16x8 ...) => (VPMADDWD128 ...)
(DotProdBroadcastFloat32x8 x y) => (VDPPS256 [127] x y) (DotProdPairsInt16x16 ...) => (VPMADDWD256 ...)
(DotProdBroadcastFloat64x2 x y) => (VDPPD128 [127] x y) (DotProdPairsInt16x32 ...) => (VPMADDWD512 ...)
(DotProdPairsMaskedInt16x8 x y mask) => (VPMADDWDMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(DotProdPairsMaskedInt16x16 x y mask) => (VPMADDWDMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(DotProdPairsMaskedInt16x32 x y mask) => (VPMADDWDMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(DotProdPairsSaturatedUint8x16 ...) => (VPMADDUBSW128 ...)
(DotProdPairsSaturatedUint8x32 ...) => (VPMADDUBSW256 ...)
(DotProdPairsSaturatedUint8x64 ...) => (VPMADDUBSW512 ...)
(DotProdPairsSaturatedMaskedUint8x16 x y mask) => (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(DotProdPairsSaturatedMaskedUint8x32 x y mask) => (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(DotProdPairsSaturatedMaskedUint8x64 x y mask) => (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(EqualFloat32x4 x y) => (VCMPPS128 [0] x y) (EqualFloat32x4 x y) => (VCMPPS128 [0] x y)
(EqualFloat32x8 x y) => (VCMPPS256 [0] x y) (EqualFloat32x8 x y) => (VCMPPS256 [0] x y)
(EqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [0] x y)) (EqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [0] x y))
@ -443,42 +446,6 @@
(FloorScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask)) (FloorScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
(FloorScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask)) (FloorScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
(FloorScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask)) (FloorScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
(FusedMultiplyAddFloat32x4 ...) => (VFMADD213PS128 ...)
(FusedMultiplyAddFloat32x8 ...) => (VFMADD213PS256 ...)
(FusedMultiplyAddFloat32x16 ...) => (VFMADD213PS512 ...)
(FusedMultiplyAddFloat64x2 ...) => (VFMADD213PD128 ...)
(FusedMultiplyAddFloat64x4 ...) => (VFMADD213PD256 ...)
(FusedMultiplyAddFloat64x8 ...) => (VFMADD213PD512 ...)
(FusedMultiplyAddMaskedFloat32x4 x y z mask) => (VFMADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
(FusedMultiplyAddMaskedFloat32x8 x y z mask) => (VFMADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
(FusedMultiplyAddMaskedFloat32x16 x y z mask) => (VFMADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
(FusedMultiplyAddMaskedFloat64x2 x y z mask) => (VFMADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
(FusedMultiplyAddMaskedFloat64x4 x y z mask) => (VFMADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
(FusedMultiplyAddMaskedFloat64x8 x y z mask) => (VFMADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
(FusedMultiplyAddSubFloat32x4 ...) => (VFMADDSUB213PS128 ...)
(FusedMultiplyAddSubFloat32x8 ...) => (VFMADDSUB213PS256 ...)
(FusedMultiplyAddSubFloat32x16 ...) => (VFMADDSUB213PS512 ...)
(FusedMultiplyAddSubFloat64x2 ...) => (VFMADDSUB213PD128 ...)
(FusedMultiplyAddSubFloat64x4 ...) => (VFMADDSUB213PD256 ...)
(FusedMultiplyAddSubFloat64x8 ...) => (VFMADDSUB213PD512 ...)
(FusedMultiplyAddSubMaskedFloat32x4 x y z mask) => (VFMADDSUB213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
(FusedMultiplyAddSubMaskedFloat32x8 x y z mask) => (VFMADDSUB213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
(FusedMultiplyAddSubMaskedFloat32x16 x y z mask) => (VFMADDSUB213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
(FusedMultiplyAddSubMaskedFloat64x2 x y z mask) => (VFMADDSUB213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
(FusedMultiplyAddSubMaskedFloat64x4 x y z mask) => (VFMADDSUB213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
(FusedMultiplyAddSubMaskedFloat64x8 x y z mask) => (VFMADDSUB213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
(FusedMultiplySubAddFloat32x4 ...) => (VFMSUBADD213PS128 ...)
(FusedMultiplySubAddFloat32x8 ...) => (VFMSUBADD213PS256 ...)
(FusedMultiplySubAddFloat32x16 ...) => (VFMSUBADD213PS512 ...)
(FusedMultiplySubAddFloat64x2 ...) => (VFMSUBADD213PD128 ...)
(FusedMultiplySubAddFloat64x4 ...) => (VFMSUBADD213PD256 ...)
(FusedMultiplySubAddFloat64x8 ...) => (VFMSUBADD213PD512 ...)
(FusedMultiplySubAddMaskedFloat32x4 x y z mask) => (VFMSUBADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
(FusedMultiplySubAddMaskedFloat32x8 x y z mask) => (VFMSUBADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
(FusedMultiplySubAddMaskedFloat32x16 x y z mask) => (VFMSUBADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
(FusedMultiplySubAddMaskedFloat64x2 x y z mask) => (VFMSUBADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
(FusedMultiplySubAddMaskedFloat64x4 x y z mask) => (VFMSUBADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
(FusedMultiplySubAddMaskedFloat64x8 x y z mask) => (VFMSUBADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
(GaloisFieldAffineTransformUint8x16 ...) => (VGF2P8AFFINEQB128 ...) (GaloisFieldAffineTransformUint8x16 ...) => (VGF2P8AFFINEQB128 ...)
(GaloisFieldAffineTransformUint8x32 ...) => (VGF2P8AFFINEQB256 ...) (GaloisFieldAffineTransformUint8x32 ...) => (VGF2P8AFFINEQB256 ...)
(GaloisFieldAffineTransformUint8x64 ...) => (VGF2P8AFFINEQB512 ...) (GaloisFieldAffineTransformUint8x64 ...) => (VGF2P8AFFINEQB512 ...)
@ -932,34 +899,49 @@
(MulInt64x2 ...) => (VPMULLQ128 ...) (MulInt64x2 ...) => (VPMULLQ128 ...)
(MulInt64x4 ...) => (VPMULLQ256 ...) (MulInt64x4 ...) => (VPMULLQ256 ...)
(MulInt64x8 ...) => (VPMULLQ512 ...) (MulInt64x8 ...) => (VPMULLQ512 ...)
(MulUint16x8 ...) => (VPMULLW128 ...)
(MulUint16x16 ...) => (VPMULLW256 ...)
(MulUint16x32 ...) => (VPMULLW512 ...)
(MulUint32x4 ...) => (VPMULLD128 ...)
(MulUint32x8 ...) => (VPMULLD256 ...)
(MulUint32x16 ...) => (VPMULLD512 ...)
(MulUint64x2 ...) => (VPMULLQ128 ...)
(MulUint64x4 ...) => (VPMULLQ256 ...)
(MulUint64x8 ...) => (VPMULLQ512 ...)
(MulAddFloat32x4 ...) => (VFMADD213PS128 ...)
(MulAddFloat32x8 ...) => (VFMADD213PS256 ...)
(MulAddFloat32x16 ...) => (VFMADD213PS512 ...)
(MulAddFloat64x2 ...) => (VFMADD213PD128 ...)
(MulAddFloat64x4 ...) => (VFMADD213PD256 ...)
(MulAddFloat64x8 ...) => (VFMADD213PD512 ...)
(MulAddMaskedFloat32x4 x y z mask) => (VFMADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
(MulAddMaskedFloat32x8 x y z mask) => (VFMADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
(MulAddMaskedFloat32x16 x y z mask) => (VFMADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
(MulAddMaskedFloat64x2 x y z mask) => (VFMADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
(MulAddMaskedFloat64x4 x y z mask) => (VFMADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
(MulAddMaskedFloat64x8 x y z mask) => (VFMADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
(MulAddSubFloat32x4 ...) => (VFMADDSUB213PS128 ...)
(MulAddSubFloat32x8 ...) => (VFMADDSUB213PS256 ...)
(MulAddSubFloat32x16 ...) => (VFMADDSUB213PS512 ...)
(MulAddSubFloat64x2 ...) => (VFMADDSUB213PD128 ...)
(MulAddSubFloat64x4 ...) => (VFMADDSUB213PD256 ...)
(MulAddSubFloat64x8 ...) => (VFMADDSUB213PD512 ...)
(MulAddSubMaskedFloat32x4 x y z mask) => (VFMADDSUB213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
(MulAddSubMaskedFloat32x8 x y z mask) => (VFMADDSUB213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
(MulAddSubMaskedFloat32x16 x y z mask) => (VFMADDSUB213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
(MulAddSubMaskedFloat64x2 x y z mask) => (VFMADDSUB213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
(MulAddSubMaskedFloat64x4 x y z mask) => (VFMADDSUB213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
(MulAddSubMaskedFloat64x8 x y z mask) => (VFMADDSUB213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
(MulEvenWidenInt32x4 ...) => (VPMULDQ128 ...) (MulEvenWidenInt32x4 ...) => (VPMULDQ128 ...)
(MulEvenWidenInt32x8 ...) => (VPMULDQ256 ...) (MulEvenWidenInt32x8 ...) => (VPMULDQ256 ...)
(MulEvenWidenInt64x2 ...) => (VPMULDQ128 ...)
(MulEvenWidenInt64x4 ...) => (VPMULDQ256 ...)
(MulEvenWidenInt64x8 ...) => (VPMULDQ512 ...)
(MulEvenWidenUint32x4 ...) => (VPMULUDQ128 ...) (MulEvenWidenUint32x4 ...) => (VPMULUDQ128 ...)
(MulEvenWidenUint32x8 ...) => (VPMULUDQ256 ...) (MulEvenWidenUint32x8 ...) => (VPMULUDQ256 ...)
(MulEvenWidenUint64x2 ...) => (VPMULUDQ128 ...) (MulHighInt16x8 ...) => (VPMULHUW128 ...)
(MulEvenWidenUint64x4 ...) => (VPMULUDQ256 ...) (MulHighInt16x16 ...) => (VPMULHUW256 ...)
(MulEvenWidenUint64x8 ...) => (VPMULUDQ512 ...)
(MulEvenWidenMaskedInt64x2 x y mask) => (VPMULDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(MulEvenWidenMaskedInt64x4 x y mask) => (VPMULDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(MulEvenWidenMaskedInt64x8 x y mask) => (VPMULDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
(MulEvenWidenMaskedUint64x2 x y mask) => (VPMULUDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(MulEvenWidenMaskedUint64x4 x y mask) => (VPMULUDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(MulEvenWidenMaskedUint64x8 x y mask) => (VPMULUDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
(MulHighInt16x8 ...) => (VPMULHW128 ...)
(MulHighInt16x16 ...) => (VPMULHW256 ...)
(MulHighInt16x32 ...) => (VPMULHW512 ...) (MulHighInt16x32 ...) => (VPMULHW512 ...)
(MulHighUint16x8 ...) => (VPMULHUW128 ...) (MulHighMaskedInt16x8 x y mask) => (VPMULHUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(MulHighUint16x16 ...) => (VPMULHUW256 ...)
(MulHighUint16x32 ...) => (VPMULHUW512 ...)
(MulHighMaskedInt16x8 x y mask) => (VPMULHWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(MulHighMaskedInt16x16 x y mask) => (VPMULHWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask)) (MulHighMaskedInt16x16 x y mask) => (VPMULHWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(MulHighMaskedInt16x32 x y mask) => (VPMULHWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask)) (MulHighMaskedInt16x32 x y mask) => (VPMULHUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(MulHighMaskedUint16x8 x y mask) => (VPMULHUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(MulHighMaskedUint16x16 x y mask) => (VPMULHUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(MulHighMaskedUint16x32 x y mask) => (VPMULHUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(MulMaskedFloat32x4 x y mask) => (VMULPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask)) (MulMaskedFloat32x4 x y mask) => (VMULPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(MulMaskedFloat32x8 x y mask) => (VMULPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask)) (MulMaskedFloat32x8 x y mask) => (VMULPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(MulMaskedFloat32x16 x y mask) => (VMULPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask)) (MulMaskedFloat32x16 x y mask) => (VMULPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
@ -975,6 +957,27 @@
(MulMaskedInt64x2 x y mask) => (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask)) (MulMaskedInt64x2 x y mask) => (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(MulMaskedInt64x4 x y mask) => (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask)) (MulMaskedInt64x4 x y mask) => (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(MulMaskedInt64x8 x y mask) => (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask)) (MulMaskedInt64x8 x y mask) => (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
(MulMaskedUint16x8 x y mask) => (VPMULLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(MulMaskedUint16x16 x y mask) => (VPMULLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(MulMaskedUint16x32 x y mask) => (VPMULLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(MulMaskedUint32x4 x y mask) => (VPMULLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(MulMaskedUint32x8 x y mask) => (VPMULLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(MulMaskedUint32x16 x y mask) => (VPMULLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
(MulMaskedUint64x2 x y mask) => (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(MulMaskedUint64x4 x y mask) => (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(MulMaskedUint64x8 x y mask) => (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
(MulSubAddFloat32x4 ...) => (VFMSUBADD213PS128 ...)
(MulSubAddFloat32x8 ...) => (VFMSUBADD213PS256 ...)
(MulSubAddFloat32x16 ...) => (VFMSUBADD213PS512 ...)
(MulSubAddFloat64x2 ...) => (VFMSUBADD213PD128 ...)
(MulSubAddFloat64x4 ...) => (VFMSUBADD213PD256 ...)
(MulSubAddFloat64x8 ...) => (VFMSUBADD213PD512 ...)
(MulSubAddMaskedFloat32x4 x y z mask) => (VFMSUBADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
(MulSubAddMaskedFloat32x8 x y z mask) => (VFMSUBADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
(MulSubAddMaskedFloat32x16 x y z mask) => (VFMSUBADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
(MulSubAddMaskedFloat64x2 x y z mask) => (VFMSUBADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
(MulSubAddMaskedFloat64x4 x y z mask) => (VFMSUBADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
(MulSubAddMaskedFloat64x8 x y z mask) => (VFMSUBADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
(NotEqualFloat32x4 x y) => (VCMPPS128 [4] x y) (NotEqualFloat32x4 x y) => (VCMPPS128 [4] x y)
(NotEqualFloat32x8 x y) => (VCMPPS256 [4] x y) (NotEqualFloat32x8 x y) => (VCMPPS256 [4] x y)
(NotEqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [4] x y)) (NotEqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [4] x y))
@ -1035,6 +1038,54 @@
(NotEqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask))) (NotEqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
(NotEqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask))) (NotEqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
(NotEqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask))) (NotEqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
(OnesCountInt8x16 ...) => (VPOPCNTB128 ...)
(OnesCountInt8x32 ...) => (VPOPCNTB256 ...)
(OnesCountInt8x64 ...) => (VPOPCNTB512 ...)
(OnesCountInt16x8 ...) => (VPOPCNTW128 ...)
(OnesCountInt16x16 ...) => (VPOPCNTW256 ...)
(OnesCountInt16x32 ...) => (VPOPCNTW512 ...)
(OnesCountInt32x4 ...) => (VPOPCNTD128 ...)
(OnesCountInt32x8 ...) => (VPOPCNTD256 ...)
(OnesCountInt32x16 ...) => (VPOPCNTD512 ...)
(OnesCountInt64x2 ...) => (VPOPCNTQ128 ...)
(OnesCountInt64x4 ...) => (VPOPCNTQ256 ...)
(OnesCountInt64x8 ...) => (VPOPCNTQ512 ...)
(OnesCountUint8x16 ...) => (VPOPCNTB128 ...)
(OnesCountUint8x32 ...) => (VPOPCNTB256 ...)
(OnesCountUint8x64 ...) => (VPOPCNTB512 ...)
(OnesCountUint16x8 ...) => (VPOPCNTW128 ...)
(OnesCountUint16x16 ...) => (VPOPCNTW256 ...)
(OnesCountUint16x32 ...) => (VPOPCNTW512 ...)
(OnesCountUint32x4 ...) => (VPOPCNTD128 ...)
(OnesCountUint32x8 ...) => (VPOPCNTD256 ...)
(OnesCountUint32x16 ...) => (VPOPCNTD512 ...)
(OnesCountUint64x2 ...) => (VPOPCNTQ128 ...)
(OnesCountUint64x4 ...) => (VPOPCNTQ256 ...)
(OnesCountUint64x8 ...) => (VPOPCNTQ512 ...)
(OnesCountMaskedInt8x16 x mask) => (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
(OnesCountMaskedInt8x32 x mask) => (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
(OnesCountMaskedInt8x64 x mask) => (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
(OnesCountMaskedInt16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
(OnesCountMaskedInt16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
(OnesCountMaskedInt16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
(OnesCountMaskedInt32x4 x mask) => (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(OnesCountMaskedInt32x8 x mask) => (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
(OnesCountMaskedInt32x16 x mask) => (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
(OnesCountMaskedInt64x2 x mask) => (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
(OnesCountMaskedInt64x4 x mask) => (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
(OnesCountMaskedInt64x8 x mask) => (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
(OnesCountMaskedUint8x16 x mask) => (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
(OnesCountMaskedUint8x32 x mask) => (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
(OnesCountMaskedUint8x64 x mask) => (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
(OnesCountMaskedUint16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
(OnesCountMaskedUint16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
(OnesCountMaskedUint16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
(OnesCountMaskedUint32x4 x mask) => (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(OnesCountMaskedUint32x8 x mask) => (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
(OnesCountMaskedUint32x16 x mask) => (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
(OnesCountMaskedUint64x2 x mask) => (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
(OnesCountMaskedUint64x4 x mask) => (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
(OnesCountMaskedUint64x8 x mask) => (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
(OrInt8x16 ...) => (VPOR128 ...) (OrInt8x16 ...) => (VPOR128 ...)
(OrInt8x32 ...) => (VPOR256 ...) (OrInt8x32 ...) => (VPOR256 ...)
(OrInt8x64 ...) => (VPORD512 ...) (OrInt8x64 ...) => (VPORD512 ...)
@ -1071,12 +1122,6 @@
(OrMaskedUint64x2 x y mask) => (VPORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask)) (OrMaskedUint64x2 x y mask) => (VPORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(OrMaskedUint64x4 x y mask) => (VPORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask)) (OrMaskedUint64x4 x y mask) => (VPORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(OrMaskedUint64x8 x y mask) => (VPORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask)) (OrMaskedUint64x8 x y mask) => (VPORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
(PairDotProdInt16x8 ...) => (VPMADDWD128 ...)
(PairDotProdInt16x16 ...) => (VPMADDWD256 ...)
(PairDotProdInt16x32 ...) => (VPMADDWD512 ...)
(PairDotProdMaskedInt16x8 x y mask) => (VPMADDWDMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(PairDotProdMaskedInt16x16 x y mask) => (VPMADDWDMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(PairDotProdMaskedInt16x32 x y mask) => (VPMADDWDMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(PermuteFloat32x8 ...) => (VPERMPS256 ...) (PermuteFloat32x8 ...) => (VPERMPS256 ...)
(PermuteFloat32x16 ...) => (VPERMPS512 ...) (PermuteFloat32x16 ...) => (VPERMPS512 ...)
(PermuteFloat64x4 ...) => (VPERMPD256 ...) (PermuteFloat64x4 ...) => (VPERMPD256 ...)
@ -1185,54 +1230,30 @@
(PermuteMaskedUint32x16 x y mask) => (VPERMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask)) (PermuteMaskedUint32x16 x y mask) => (VPERMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
(PermuteMaskedUint64x4 x y mask) => (VPERMQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask)) (PermuteMaskedUint64x4 x y mask) => (VPERMQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(PermuteMaskedUint64x8 x y mask) => (VPERMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask)) (PermuteMaskedUint64x8 x y mask) => (VPERMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
(PopCountInt8x16 ...) => (VPOPCNTB128 ...) (ReciprocalFloat32x4 ...) => (VRCPPS128 ...)
(PopCountInt8x32 ...) => (VPOPCNTB256 ...) (ReciprocalFloat32x8 ...) => (VRCPPS256 ...)
(PopCountInt8x64 ...) => (VPOPCNTB512 ...) (ReciprocalFloat32x16 ...) => (VRCP14PS512 ...)
(PopCountInt16x8 ...) => (VPOPCNTW128 ...) (ReciprocalFloat64x2 ...) => (VRCP14PD128 ...)
(PopCountInt16x16 ...) => (VPOPCNTW256 ...) (ReciprocalFloat64x4 ...) => (VRCP14PD256 ...)
(PopCountInt16x32 ...) => (VPOPCNTW512 ...) (ReciprocalFloat64x8 ...) => (VRCP14PD512 ...)
(PopCountInt32x4 ...) => (VPOPCNTD128 ...) (ReciprocalMaskedFloat32x4 x mask) => (VRCP14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(PopCountInt32x8 ...) => (VPOPCNTD256 ...) (ReciprocalMaskedFloat32x8 x mask) => (VRCP14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
(PopCountInt32x16 ...) => (VPOPCNTD512 ...) (ReciprocalMaskedFloat32x16 x mask) => (VRCP14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
(PopCountInt64x2 ...) => (VPOPCNTQ128 ...) (ReciprocalMaskedFloat64x2 x mask) => (VRCP14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
(PopCountInt64x4 ...) => (VPOPCNTQ256 ...) (ReciprocalMaskedFloat64x4 x mask) => (VRCP14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
(PopCountInt64x8 ...) => (VPOPCNTQ512 ...) (ReciprocalMaskedFloat64x8 x mask) => (VRCP14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
(PopCountUint8x16 ...) => (VPOPCNTB128 ...) (ReciprocalSqrtFloat32x4 ...) => (VRSQRTPS128 ...)
(PopCountUint8x32 ...) => (VPOPCNTB256 ...) (ReciprocalSqrtFloat32x8 ...) => (VRSQRTPS256 ...)
(PopCountUint8x64 ...) => (VPOPCNTB512 ...) (ReciprocalSqrtFloat32x16 ...) => (VRSQRT14PS512 ...)
(PopCountUint16x8 ...) => (VPOPCNTW128 ...) (ReciprocalSqrtFloat64x2 ...) => (VRSQRT14PD128 ...)
(PopCountUint16x16 ...) => (VPOPCNTW256 ...) (ReciprocalSqrtFloat64x4 ...) => (VRSQRT14PD256 ...)
(PopCountUint16x32 ...) => (VPOPCNTW512 ...) (ReciprocalSqrtFloat64x8 ...) => (VRSQRT14PD512 ...)
(PopCountUint32x4 ...) => (VPOPCNTD128 ...) (ReciprocalSqrtMaskedFloat32x4 x mask) => (VRSQRT14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(PopCountUint32x8 ...) => (VPOPCNTD256 ...) (ReciprocalSqrtMaskedFloat32x8 x mask) => (VRSQRT14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
(PopCountUint32x16 ...) => (VPOPCNTD512 ...) (ReciprocalSqrtMaskedFloat32x16 x mask) => (VRSQRT14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
(PopCountUint64x2 ...) => (VPOPCNTQ128 ...) (ReciprocalSqrtMaskedFloat64x2 x mask) => (VRSQRT14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
(PopCountUint64x4 ...) => (VPOPCNTQ256 ...) (ReciprocalSqrtMaskedFloat64x4 x mask) => (VRSQRT14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
(PopCountUint64x8 ...) => (VPOPCNTQ512 ...) (ReciprocalSqrtMaskedFloat64x8 x mask) => (VRSQRT14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
(PopCountMaskedInt8x16 x mask) => (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
(PopCountMaskedInt8x32 x mask) => (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
(PopCountMaskedInt8x64 x mask) => (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
(PopCountMaskedInt16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
(PopCountMaskedInt16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
(PopCountMaskedInt16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
(PopCountMaskedInt32x4 x mask) => (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(PopCountMaskedInt32x8 x mask) => (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
(PopCountMaskedInt32x16 x mask) => (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
(PopCountMaskedInt64x2 x mask) => (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
(PopCountMaskedInt64x4 x mask) => (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
(PopCountMaskedInt64x8 x mask) => (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
(PopCountMaskedUint8x16 x mask) => (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
(PopCountMaskedUint8x32 x mask) => (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
(PopCountMaskedUint8x64 x mask) => (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
(PopCountMaskedUint16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
(PopCountMaskedUint16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
(PopCountMaskedUint16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
(PopCountMaskedUint32x4 x mask) => (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(PopCountMaskedUint32x8 x mask) => (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
(PopCountMaskedUint32x16 x mask) => (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
(PopCountMaskedUint64x2 x mask) => (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
(PopCountMaskedUint64x4 x mask) => (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
(PopCountMaskedUint64x8 x mask) => (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
(RotateAllLeftInt32x4 ...) => (VPROLD128 ...) (RotateAllLeftInt32x4 ...) => (VPROLD128 ...)
(RotateAllLeftInt32x8 ...) => (VPROLD256 ...) (RotateAllLeftInt32x8 ...) => (VPROLD256 ...)
(RotateAllLeftInt32x16 ...) => (VPROLD512 ...) (RotateAllLeftInt32x16 ...) => (VPROLD512 ...)
@ -1329,52 +1350,34 @@
(RotateRightMaskedUint64x2 x y mask) => (VPRORVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask)) (RotateRightMaskedUint64x2 x y mask) => (VPRORVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(RotateRightMaskedUint64x4 x y mask) => (VPRORVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask)) (RotateRightMaskedUint64x4 x y mask) => (VPRORVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(RotateRightMaskedUint64x8 x y mask) => (VPRORVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask)) (RotateRightMaskedUint64x8 x y mask) => (VPRORVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
(RoundFloat32x4 x) => (VROUNDPS128 [0] x) (RoundToEvenFloat32x4 x) => (VROUNDPS128 [0] x)
(RoundFloat32x8 x) => (VROUNDPS256 [0] x) (RoundToEvenFloat32x8 x) => (VROUNDPS256 [0] x)
(RoundFloat64x2 x) => (VROUNDPD128 [0] x) (RoundToEvenFloat64x2 x) => (VROUNDPD128 [0] x)
(RoundFloat64x4 x) => (VROUNDPD256 [0] x) (RoundToEvenFloat64x4 x) => (VROUNDPD256 [0] x)
(RoundScaledFloat32x4 [a] x) => (VRNDSCALEPS128 [a+0] x) (RoundToEvenScaledFloat32x4 [a] x) => (VRNDSCALEPS128 [a+0] x)
(RoundScaledFloat32x8 [a] x) => (VRNDSCALEPS256 [a+0] x) (RoundToEvenScaledFloat32x8 [a] x) => (VRNDSCALEPS256 [a+0] x)
(RoundScaledFloat32x16 [a] x) => (VRNDSCALEPS512 [a+0] x) (RoundToEvenScaledFloat32x16 [a] x) => (VRNDSCALEPS512 [a+0] x)
(RoundScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+0] x) (RoundToEvenScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+0] x)
(RoundScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+0] x) (RoundToEvenScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+0] x)
(RoundScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+0] x) (RoundToEvenScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+0] x)
(RoundScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask)) (RoundToEvenScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
(RoundScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask)) (RoundToEvenScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
(RoundScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask)) (RoundToEvenScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
(RoundScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask)) (RoundToEvenScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
(RoundScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask)) (RoundToEvenScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
(RoundScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask)) (RoundToEvenScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
(RoundScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+0] x) (RoundToEvenScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+0] x)
(RoundScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+0] x) (RoundToEvenScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+0] x)
(RoundScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+0] x) (RoundToEvenScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+0] x)
(RoundScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+0] x) (RoundToEvenScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+0] x)
(RoundScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+0] x) (RoundToEvenScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+0] x)
(RoundScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+0] x) (RoundToEvenScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+0] x)
(RoundScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask)) (RoundToEvenScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
(RoundScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask)) (RoundToEvenScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
(RoundScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask)) (RoundToEvenScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
(RoundScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask)) (RoundToEvenScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
(RoundScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask)) (RoundToEvenScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
(RoundScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask)) (RoundToEvenScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
(SaturatedAddDotProdInt32x4 ...) => (VPDPWSSDS128 ...)
(SaturatedAddDotProdInt32x8 ...) => (VPDPWSSDS256 ...)
(SaturatedAddDotProdInt32x16 ...) => (VPDPWSSDS512 ...)
(SaturatedAddDotProdMaskedInt32x4 x y z mask) => (VPDPWSSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
(SaturatedAddDotProdMaskedInt32x8 x y z mask) => (VPDPWSSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
(SaturatedAddDotProdMaskedInt32x16 x y z mask) => (VPDPWSSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
(SaturatedUnsignedSignedPairDotProdUint8x16 ...) => (VPMADDUBSW128 ...)
(SaturatedUnsignedSignedPairDotProdUint8x32 ...) => (VPMADDUBSW256 ...)
(SaturatedUnsignedSignedPairDotProdUint8x64 ...) => (VPMADDUBSW512 ...)
(SaturatedUnsignedSignedPairDotProdMaskedUint8x16 x y mask) => (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(SaturatedUnsignedSignedPairDotProdMaskedUint8x32 x y mask) => (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(SaturatedUnsignedSignedPairDotProdMaskedUint8x64 x y mask) => (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 ...) => (VPDPBUSDS128 ...)
(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 ...) => (VPDPBUSDS256 ...)
(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSDS512 ...)
(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
(ScaleFloat32x4 ...) => (VSCALEFPS128 ...) (ScaleFloat32x4 ...) => (VSCALEFPS128 ...)
(ScaleFloat32x8 ...) => (VSCALEFPS256 ...) (ScaleFloat32x8 ...) => (VSCALEFPS256 ...)
(ScaleFloat32x16 ...) => (VSCALEFPS512 ...) (ScaleFloat32x16 ...) => (VSCALEFPS512 ...)
@ -1795,12 +1798,6 @@
(ShiftRightMaskedUint64x2 x y mask) => (VPSRLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask)) (ShiftRightMaskedUint64x2 x y mask) => (VPSRLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(ShiftRightMaskedUint64x4 x y mask) => (VPSRLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask)) (ShiftRightMaskedUint64x4 x y mask) => (VPSRLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(ShiftRightMaskedUint64x8 x y mask) => (VPSRLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask)) (ShiftRightMaskedUint64x8 x y mask) => (VPSRLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
(SignInt8x16 ...) => (VPSIGNB128 ...)
(SignInt8x32 ...) => (VPSIGNB256 ...)
(SignInt16x8 ...) => (VPSIGNW128 ...)
(SignInt16x16 ...) => (VPSIGNW256 ...)
(SignInt32x4 ...) => (VPSIGND128 ...)
(SignInt32x8 ...) => (VPSIGND256 ...)
(SqrtFloat32x4 ...) => (VSQRTPS128 ...) (SqrtFloat32x4 ...) => (VSQRTPS128 ...)
(SqrtFloat32x8 ...) => (VSQRTPS256 ...) (SqrtFloat32x8 ...) => (VSQRTPS256 ...)
(SqrtFloat32x16 ...) => (VSQRTPS512 ...) (SqrtFloat32x16 ...) => (VSQRTPS512 ...)
@ -1893,24 +1890,24 @@
(SubSaturatedInt16x8 ...) => (VPSUBSW128 ...) (SubSaturatedInt16x8 ...) => (VPSUBSW128 ...)
(SubSaturatedInt16x16 ...) => (VPSUBSW256 ...) (SubSaturatedInt16x16 ...) => (VPSUBSW256 ...)
(SubSaturatedInt16x32 ...) => (VPSUBSW512 ...) (SubSaturatedInt16x32 ...) => (VPSUBSW512 ...)
(SubSaturatedUint8x16 ...) => (VPSUBSB128 ...) (SubSaturatedUint8x16 ...) => (VPSUBUSB128 ...)
(SubSaturatedUint8x32 ...) => (VPSUBSB256 ...) (SubSaturatedUint8x32 ...) => (VPSUBUSB256 ...)
(SubSaturatedUint8x64 ...) => (VPSUBSB512 ...) (SubSaturatedUint8x64 ...) => (VPSUBUSB512 ...)
(SubSaturatedUint16x8 ...) => (VPSUBSW128 ...) (SubSaturatedUint16x8 ...) => (VPSUBUSW128 ...)
(SubSaturatedUint16x16 ...) => (VPSUBSW256 ...) (SubSaturatedUint16x16 ...) => (VPSUBUSW256 ...)
(SubSaturatedUint16x32 ...) => (VPSUBSW512 ...) (SubSaturatedUint16x32 ...) => (VPSUBUSW512 ...)
(SubSaturatedMaskedInt8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask)) (SubSaturatedMaskedInt8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
(SubSaturatedMaskedInt8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask)) (SubSaturatedMaskedInt8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
(SubSaturatedMaskedInt8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask)) (SubSaturatedMaskedInt8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
(SubSaturatedMaskedInt16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask)) (SubSaturatedMaskedInt16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(SubSaturatedMaskedInt16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask)) (SubSaturatedMaskedInt16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(SubSaturatedMaskedInt16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask)) (SubSaturatedMaskedInt16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(SubSaturatedMaskedUint8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask)) (SubSaturatedMaskedUint8x16 x y mask) => (VPSUBUSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
(SubSaturatedMaskedUint8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask)) (SubSaturatedMaskedUint8x32 x y mask) => (VPSUBUSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
(SubSaturatedMaskedUint8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask)) (SubSaturatedMaskedUint8x64 x y mask) => (VPSUBUSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
(SubSaturatedMaskedUint16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask)) (SubSaturatedMaskedUint16x8 x y mask) => (VPSUBUSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(SubSaturatedMaskedUint16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask)) (SubSaturatedMaskedUint16x16 x y mask) => (VPSUBUSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(SubSaturatedMaskedUint16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask)) (SubSaturatedMaskedUint16x32 x y mask) => (VPSUBUSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(TruncFloat32x4 x) => (VROUNDPS128 [3] x) (TruncFloat32x4 x) => (VROUNDPS128 [3] x)
(TruncFloat32x8 x) => (VROUNDPS256 [3] x) (TruncFloat32x8 x) => (VROUNDPS256 [3] x)
(TruncFloat64x2 x) => (VROUNDPD128 [3] x) (TruncFloat64x2 x) => (VROUNDPD128 [3] x)
@ -1939,12 +1936,6 @@
(TruncScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask)) (TruncScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
(TruncScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask)) (TruncScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
(TruncScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask)) (TruncScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
(UnsignedSignedQuadDotProdAccumulateInt32x4 ...) => (VPDPBUSD128 ...)
(UnsignedSignedQuadDotProdAccumulateInt32x8 ...) => (VPDPBUSD256 ...)
(UnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSD512 ...)
(UnsignedSignedQuadDotProdAccumulateMaskedInt32x4 x y z mask) => (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
(UnsignedSignedQuadDotProdAccumulateMaskedInt32x8 x y z mask) => (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
(UnsignedSignedQuadDotProdAccumulateMaskedInt32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
(XorInt8x16 ...) => (VPXOR128 ...) (XorInt8x16 ...) => (VPXOR128 ...)
(XorInt8x32 ...) => (VPXOR256 ...) (XorInt8x32 ...) => (VPXOR256 ...)
(XorInt8x64 ...) => (VPXORD512 ...) (XorInt8x64 ...) => (VPXORD512 ...)

View file

@ -195,6 +195,18 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPADDSWMasked128", argLength: 3, reg: w2kw, asm: "VPADDSW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPADDSWMasked128", argLength: 3, reg: w2kw, asm: "VPADDSW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPADDSWMasked256", argLength: 3, reg: w2kw, asm: "VPADDSW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPADDSWMasked256", argLength: 3, reg: w2kw, asm: "VPADDSW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPADDSWMasked512", argLength: 3, reg: w2kw, asm: "VPADDSW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPADDSWMasked512", argLength: 3, reg: w2kw, asm: "VPADDSW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPADDUSB128", argLength: 2, reg: v21, asm: "VPADDUSB", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPADDUSB256", argLength: 2, reg: v21, asm: "VPADDUSB", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPADDUSB512", argLength: 2, reg: w21, asm: "VPADDUSB", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPADDUSBMasked128", argLength: 3, reg: w2kw, asm: "VPADDUSB", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPADDUSBMasked256", argLength: 3, reg: w2kw, asm: "VPADDUSB", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPADDUSBMasked512", argLength: 3, reg: w2kw, asm: "VPADDUSB", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPADDUSW128", argLength: 2, reg: v21, asm: "VPADDUSW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPADDUSW256", argLength: 2, reg: v21, asm: "VPADDUSW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPADDUSW512", argLength: 2, reg: w21, asm: "VPADDUSW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPADDUSWMasked128", argLength: 3, reg: w2kw, asm: "VPADDUSW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPADDUSWMasked256", argLength: 3, reg: w2kw, asm: "VPADDUSW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPADDUSWMasked512", argLength: 3, reg: w2kw, asm: "VPADDUSW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPADDW128", argLength: 2, reg: v21, asm: "VPADDW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPADDW128", argLength: 2, reg: v21, asm: "VPADDW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPADDW256", argLength: 2, reg: v21, asm: "VPADDW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPADDW256", argLength: 2, reg: v21, asm: "VPADDW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPADDW512", argLength: 2, reg: w21, asm: "VPADDW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPADDW512", argLength: 2, reg: w21, asm: "VPADDW", commutative: true, typ: "Vec512", resultInArg0: false},
@ -497,22 +509,12 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPMINUWMasked512", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINUWMasked512", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULDQ128", argLength: 2, reg: v21, asm: "VPMULDQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULDQ128", argLength: 2, reg: v21, asm: "VPMULDQ", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULDQ256", argLength: 2, reg: v21, asm: "VPMULDQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULDQ256", argLength: 2, reg: v21, asm: "VPMULDQ", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULDQ512", argLength: 2, reg: w21, asm: "VPMULDQ", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULDQMasked128", argLength: 3, reg: w2kw, asm: "VPMULDQ", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULDQMasked256", argLength: 3, reg: w2kw, asm: "VPMULDQ", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULDQMasked512", argLength: 3, reg: w2kw, asm: "VPMULDQ", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULHUW128", argLength: 2, reg: v21, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULHUW128", argLength: 2, reg: v21, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULHUW256", argLength: 2, reg: v21, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULHUW256", argLength: 2, reg: v21, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULHUW512", argLength: 2, reg: w21, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULHUWMasked128", argLength: 3, reg: w2kw, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULHUWMasked128", argLength: 3, reg: w2kw, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULHUWMasked256", argLength: 3, reg: w2kw, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULHUWMasked512", argLength: 3, reg: w2kw, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMULHUWMasked512", argLength: 3, reg: w2kw, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULHW128", argLength: 2, reg: v21, asm: "VPMULHW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULHW256", argLength: 2, reg: v21, asm: "VPMULHW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULHW512", argLength: 2, reg: w21, asm: "VPMULHW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMULHW512", argLength: 2, reg: w21, asm: "VPMULHW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULHWMasked128", argLength: 3, reg: w2kw, asm: "VPMULHW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULHWMasked256", argLength: 3, reg: w2kw, asm: "VPMULHW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULHWMasked256", argLength: 3, reg: w2kw, asm: "VPMULHW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULHWMasked512", argLength: 3, reg: w2kw, asm: "VPMULHW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULLD128", argLength: 2, reg: v21, asm: "VPMULLD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULLD128", argLength: 2, reg: v21, asm: "VPMULLD", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULLD256", argLength: 2, reg: v21, asm: "VPMULLD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULLD256", argLength: 2, reg: v21, asm: "VPMULLD", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULLD512", argLength: 2, reg: w21, asm: "VPMULLD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMULLD512", argLength: 2, reg: w21, asm: "VPMULLD", commutative: true, typ: "Vec512", resultInArg0: false},
@ -533,10 +535,6 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPMULLWMasked512", argLength: 3, reg: w2kw, asm: "VPMULLW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMULLWMasked512", argLength: 3, reg: w2kw, asm: "VPMULLW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULUDQ128", argLength: 2, reg: v21, asm: "VPMULUDQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULUDQ128", argLength: 2, reg: v21, asm: "VPMULUDQ", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULUDQ256", argLength: 2, reg: v21, asm: "VPMULUDQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULUDQ256", argLength: 2, reg: v21, asm: "VPMULUDQ", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULUDQ512", argLength: 2, reg: w21, asm: "VPMULUDQ", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULUDQMasked128", argLength: 3, reg: w2kw, asm: "VPMULUDQ", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULUDQMasked256", argLength: 3, reg: w2kw, asm: "VPMULUDQ", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULUDQMasked512", argLength: 3, reg: w2kw, asm: "VPMULUDQ", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPOPCNTB128", argLength: 1, reg: w11, asm: "VPOPCNTB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPOPCNTB128", argLength: 1, reg: w11, asm: "VPOPCNTB", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPOPCNTB256", argLength: 1, reg: w11, asm: "VPOPCNTB", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPOPCNTB256", argLength: 1, reg: w11, asm: "VPOPCNTB", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPOPCNTB512", argLength: 1, reg: w11, asm: "VPOPCNTB", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPOPCNTB512", argLength: 1, reg: w11, asm: "VPOPCNTB", commutative: false, typ: "Vec512", resultInArg0: false},
@ -775,6 +773,18 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPSUBSWMasked128", argLength: 3, reg: w2kw, asm: "VPSUBSW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSUBSWMasked128", argLength: 3, reg: w2kw, asm: "VPSUBSW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPSUBSWMasked256", argLength: 3, reg: w2kw, asm: "VPSUBSW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSUBSWMasked256", argLength: 3, reg: w2kw, asm: "VPSUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPSUBSWMasked512", argLength: 3, reg: w2kw, asm: "VPSUBSW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSUBSWMasked512", argLength: 3, reg: w2kw, asm: "VPSUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPSUBUSB128", argLength: 2, reg: v21, asm: "VPSUBUSB", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPSUBUSB256", argLength: 2, reg: v21, asm: "VPSUBUSB", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPSUBUSB512", argLength: 2, reg: w21, asm: "VPSUBUSB", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPSUBUSBMasked128", argLength: 3, reg: w2kw, asm: "VPSUBUSB", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPSUBUSBMasked256", argLength: 3, reg: w2kw, asm: "VPSUBUSB", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPSUBUSBMasked512", argLength: 3, reg: w2kw, asm: "VPSUBUSB", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPSUBUSW128", argLength: 2, reg: v21, asm: "VPSUBUSW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPSUBUSW256", argLength: 2, reg: v21, asm: "VPSUBUSW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPSUBUSW512", argLength: 2, reg: w21, asm: "VPSUBUSW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPSUBUSWMasked128", argLength: 3, reg: w2kw, asm: "VPSUBUSW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPSUBUSWMasked256", argLength: 3, reg: w2kw, asm: "VPSUBUSW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPSUBUSWMasked512", argLength: 3, reg: w2kw, asm: "VPSUBUSW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPSUBW128", argLength: 2, reg: v21, asm: "VPSUBW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSUBW128", argLength: 2, reg: v21, asm: "VPSUBW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPSUBW256", argLength: 2, reg: v21, asm: "VPSUBW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSUBW256", argLength: 2, reg: v21, asm: "VPSUBW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPSUBW512", argLength: 2, reg: w21, asm: "VPSUBW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSUBW512", argLength: 2, reg: w21, asm: "VPSUBW", commutative: false, typ: "Vec512", resultInArg0: false},
@ -879,9 +889,6 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VREDUCEPDMasked128", argLength: 2, reg: wkw, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VREDUCEPDMasked128", argLength: 2, reg: wkw, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VREDUCEPDMasked256", argLength: 2, reg: wkw, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VREDUCEPDMasked256", argLength: 2, reg: wkw, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VREDUCEPDMasked512", argLength: 2, reg: wkw, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VREDUCEPDMasked512", argLength: 2, reg: wkw, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VDPPS128", argLength: 2, reg: v21, asm: "VDPPS", aux: "Int8", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VDPPS256", argLength: 2, reg: v21, asm: "VDPPS", aux: "Int8", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VDPPD128", argLength: 2, reg: v21, asm: "VDPPD", aux: "Int8", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VCMPPS128", argLength: 2, reg: v21, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VCMPPS128", argLength: 2, reg: v21, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VCMPPS256", argLength: 2, reg: v21, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VCMPPS256", argLength: 2, reg: v21, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VCMPPS512", argLength: 2, reg: w2k, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VCMPPS512", argLength: 2, reg: w2k, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},

View file

@ -3,36 +3,48 @@ package main
func simdGenericOps() []opData { func simdGenericOps() []opData {
return []opData{ return []opData{
{name: "AbsoluteInt8x16", argLength: 1, commutative: false}, {name: "AbsInt8x16", argLength: 1, commutative: false},
{name: "AbsoluteInt8x32", argLength: 1, commutative: false}, {name: "AbsInt8x32", argLength: 1, commutative: false},
{name: "AbsoluteInt8x64", argLength: 1, commutative: false}, {name: "AbsInt8x64", argLength: 1, commutative: false},
{name: "AbsoluteInt16x8", argLength: 1, commutative: false}, {name: "AbsInt16x8", argLength: 1, commutative: false},
{name: "AbsoluteInt16x16", argLength: 1, commutative: false}, {name: "AbsInt16x16", argLength: 1, commutative: false},
{name: "AbsoluteInt16x32", argLength: 1, commutative: false}, {name: "AbsInt16x32", argLength: 1, commutative: false},
{name: "AbsoluteInt32x4", argLength: 1, commutative: false}, {name: "AbsInt32x4", argLength: 1, commutative: false},
{name: "AbsoluteInt32x8", argLength: 1, commutative: false}, {name: "AbsInt32x8", argLength: 1, commutative: false},
{name: "AbsoluteInt32x16", argLength: 1, commutative: false}, {name: "AbsInt32x16", argLength: 1, commutative: false},
{name: "AbsoluteInt64x2", argLength: 1, commutative: false}, {name: "AbsInt64x2", argLength: 1, commutative: false},
{name: "AbsoluteInt64x4", argLength: 1, commutative: false}, {name: "AbsInt64x4", argLength: 1, commutative: false},
{name: "AbsoluteInt64x8", argLength: 1, commutative: false}, {name: "AbsInt64x8", argLength: 1, commutative: false},
{name: "AbsoluteMaskedInt8x16", argLength: 2, commutative: false}, {name: "AbsMaskedInt8x16", argLength: 2, commutative: false},
{name: "AbsoluteMaskedInt8x32", argLength: 2, commutative: false}, {name: "AbsMaskedInt8x32", argLength: 2, commutative: false},
{name: "AbsoluteMaskedInt8x64", argLength: 2, commutative: false}, {name: "AbsMaskedInt8x64", argLength: 2, commutative: false},
{name: "AbsoluteMaskedInt16x8", argLength: 2, commutative: false}, {name: "AbsMaskedInt16x8", argLength: 2, commutative: false},
{name: "AbsoluteMaskedInt16x16", argLength: 2, commutative: false}, {name: "AbsMaskedInt16x16", argLength: 2, commutative: false},
{name: "AbsoluteMaskedInt16x32", argLength: 2, commutative: false}, {name: "AbsMaskedInt16x32", argLength: 2, commutative: false},
{name: "AbsoluteMaskedInt32x4", argLength: 2, commutative: false}, {name: "AbsMaskedInt32x4", argLength: 2, commutative: false},
{name: "AbsoluteMaskedInt32x8", argLength: 2, commutative: false}, {name: "AbsMaskedInt32x8", argLength: 2, commutative: false},
{name: "AbsoluteMaskedInt32x16", argLength: 2, commutative: false}, {name: "AbsMaskedInt32x16", argLength: 2, commutative: false},
{name: "AbsoluteMaskedInt64x2", argLength: 2, commutative: false}, {name: "AbsMaskedInt64x2", argLength: 2, commutative: false},
{name: "AbsoluteMaskedInt64x4", argLength: 2, commutative: false}, {name: "AbsMaskedInt64x4", argLength: 2, commutative: false},
{name: "AbsoluteMaskedInt64x8", argLength: 2, commutative: false}, {name: "AbsMaskedInt64x8", argLength: 2, commutative: false},
{name: "AddDotProdInt32x4", argLength: 3, commutative: false}, {name: "AddDotProdPairsSaturatedInt32x4", argLength: 3, commutative: false},
{name: "AddDotProdInt32x8", argLength: 3, commutative: false}, {name: "AddDotProdPairsSaturatedInt32x8", argLength: 3, commutative: false},
{name: "AddDotProdInt32x16", argLength: 3, commutative: false}, {name: "AddDotProdPairsSaturatedInt32x16", argLength: 3, commutative: false},
{name: "AddDotProdMaskedInt32x4", argLength: 4, commutative: false}, {name: "AddDotProdPairsSaturatedMaskedInt32x4", argLength: 4, commutative: false},
{name: "AddDotProdMaskedInt32x8", argLength: 4, commutative: false}, {name: "AddDotProdPairsSaturatedMaskedInt32x8", argLength: 4, commutative: false},
{name: "AddDotProdMaskedInt32x16", argLength: 4, commutative: false}, {name: "AddDotProdPairsSaturatedMaskedInt32x16", argLength: 4, commutative: false},
{name: "AddDotProdQuadrupleInt32x4", argLength: 3, commutative: false},
{name: "AddDotProdQuadrupleInt32x8", argLength: 3, commutative: false},
{name: "AddDotProdQuadrupleInt32x16", argLength: 3, commutative: false},
{name: "AddDotProdQuadrupleMaskedInt32x4", argLength: 4, commutative: false},
{name: "AddDotProdQuadrupleMaskedInt32x8", argLength: 4, commutative: false},
{name: "AddDotProdQuadrupleMaskedInt32x16", argLength: 4, commutative: false},
{name: "AddDotProdQuadrupleSaturatedInt32x4", argLength: 3, commutative: false},
{name: "AddDotProdQuadrupleSaturatedInt32x8", argLength: 3, commutative: false},
{name: "AddDotProdQuadrupleSaturatedInt32x16", argLength: 3, commutative: false},
{name: "AddDotProdQuadrupleSaturatedMaskedInt32x4", argLength: 4, commutative: false},
{name: "AddDotProdQuadrupleSaturatedMaskedInt32x8", argLength: 4, commutative: false},
{name: "AddDotProdQuadrupleSaturatedMaskedInt32x16", argLength: 4, commutative: false},
{name: "AddFloat32x4", argLength: 2, commutative: true}, {name: "AddFloat32x4", argLength: 2, commutative: true},
{name: "AddFloat32x8", argLength: 2, commutative: true}, {name: "AddFloat32x8", argLength: 2, commutative: true},
{name: "AddFloat32x16", argLength: 2, commutative: true}, {name: "AddFloat32x16", argLength: 2, commutative: true},
@ -207,30 +219,6 @@ func simdGenericOps() []opData {
{name: "AndUint64x2", argLength: 2, commutative: true}, {name: "AndUint64x2", argLength: 2, commutative: true},
{name: "AndUint64x4", argLength: 2, commutative: true}, {name: "AndUint64x4", argLength: 2, commutative: true},
{name: "AndUint64x8", argLength: 2, commutative: true}, {name: "AndUint64x8", argLength: 2, commutative: true},
{name: "ApproximateReciprocalFloat32x4", argLength: 1, commutative: false},
{name: "ApproximateReciprocalFloat32x8", argLength: 1, commutative: false},
{name: "ApproximateReciprocalFloat32x16", argLength: 1, commutative: false},
{name: "ApproximateReciprocalFloat64x2", argLength: 1, commutative: false},
{name: "ApproximateReciprocalFloat64x4", argLength: 1, commutative: false},
{name: "ApproximateReciprocalFloat64x8", argLength: 1, commutative: false},
{name: "ApproximateReciprocalMaskedFloat32x4", argLength: 2, commutative: false},
{name: "ApproximateReciprocalMaskedFloat32x8", argLength: 2, commutative: false},
{name: "ApproximateReciprocalMaskedFloat32x16", argLength: 2, commutative: false},
{name: "ApproximateReciprocalMaskedFloat64x2", argLength: 2, commutative: false},
{name: "ApproximateReciprocalMaskedFloat64x4", argLength: 2, commutative: false},
{name: "ApproximateReciprocalMaskedFloat64x8", argLength: 2, commutative: false},
{name: "ApproximateReciprocalOfSqrtFloat32x4", argLength: 1, commutative: false},
{name: "ApproximateReciprocalOfSqrtFloat32x8", argLength: 1, commutative: false},
{name: "ApproximateReciprocalOfSqrtFloat32x16", argLength: 1, commutative: false},
{name: "ApproximateReciprocalOfSqrtFloat64x2", argLength: 1, commutative: false},
{name: "ApproximateReciprocalOfSqrtFloat64x4", argLength: 1, commutative: false},
{name: "ApproximateReciprocalOfSqrtFloat64x8", argLength: 1, commutative: false},
{name: "ApproximateReciprocalOfSqrtMaskedFloat32x4", argLength: 2, commutative: false},
{name: "ApproximateReciprocalOfSqrtMaskedFloat32x8", argLength: 2, commutative: false},
{name: "ApproximateReciprocalOfSqrtMaskedFloat32x16", argLength: 2, commutative: false},
{name: "ApproximateReciprocalOfSqrtMaskedFloat64x2", argLength: 2, commutative: false},
{name: "ApproximateReciprocalOfSqrtMaskedFloat64x4", argLength: 2, commutative: false},
{name: "ApproximateReciprocalOfSqrtMaskedFloat64x8", argLength: 2, commutative: false},
{name: "AverageMaskedUint8x16", argLength: 3, commutative: true}, {name: "AverageMaskedUint8x16", argLength: 3, commutative: true},
{name: "AverageMaskedUint8x32", argLength: 3, commutative: true}, {name: "AverageMaskedUint8x32", argLength: 3, commutative: true},
{name: "AverageMaskedUint8x64", argLength: 3, commutative: true}, {name: "AverageMaskedUint8x64", argLength: 3, commutative: true},
@ -289,6 +277,12 @@ func simdGenericOps() []opData {
{name: "ConvertToUint32MaskedFloat32x4", argLength: 2, commutative: false}, {name: "ConvertToUint32MaskedFloat32x4", argLength: 2, commutative: false},
{name: "ConvertToUint32MaskedFloat32x8", argLength: 2, commutative: false}, {name: "ConvertToUint32MaskedFloat32x8", argLength: 2, commutative: false},
{name: "ConvertToUint32MaskedFloat32x16", argLength: 2, commutative: false}, {name: "ConvertToUint32MaskedFloat32x16", argLength: 2, commutative: false},
{name: "CopySignInt8x16", argLength: 2, commutative: false},
{name: "CopySignInt8x32", argLength: 2, commutative: false},
{name: "CopySignInt16x8", argLength: 2, commutative: false},
{name: "CopySignInt16x16", argLength: 2, commutative: false},
{name: "CopySignInt32x4", argLength: 2, commutative: false},
{name: "CopySignInt32x8", argLength: 2, commutative: false},
{name: "DivFloat32x4", argLength: 2, commutative: false}, {name: "DivFloat32x4", argLength: 2, commutative: false},
{name: "DivFloat32x8", argLength: 2, commutative: false}, {name: "DivFloat32x8", argLength: 2, commutative: false},
{name: "DivFloat32x16", argLength: 2, commutative: false}, {name: "DivFloat32x16", argLength: 2, commutative: false},
@ -301,9 +295,18 @@ func simdGenericOps() []opData {
{name: "DivMaskedFloat64x2", argLength: 3, commutative: false}, {name: "DivMaskedFloat64x2", argLength: 3, commutative: false},
{name: "DivMaskedFloat64x4", argLength: 3, commutative: false}, {name: "DivMaskedFloat64x4", argLength: 3, commutative: false},
{name: "DivMaskedFloat64x8", argLength: 3, commutative: false}, {name: "DivMaskedFloat64x8", argLength: 3, commutative: false},
{name: "DotProdBroadcastFloat32x4", argLength: 2, commutative: true}, {name: "DotProdPairsInt16x8", argLength: 2, commutative: false},
{name: "DotProdBroadcastFloat32x8", argLength: 2, commutative: true}, {name: "DotProdPairsInt16x16", argLength: 2, commutative: false},
{name: "DotProdBroadcastFloat64x2", argLength: 2, commutative: true}, {name: "DotProdPairsInt16x32", argLength: 2, commutative: false},
{name: "DotProdPairsMaskedInt16x8", argLength: 3, commutative: false},
{name: "DotProdPairsMaskedInt16x16", argLength: 3, commutative: false},
{name: "DotProdPairsMaskedInt16x32", argLength: 3, commutative: false},
{name: "DotProdPairsSaturatedMaskedUint8x16", argLength: 3, commutative: false},
{name: "DotProdPairsSaturatedMaskedUint8x32", argLength: 3, commutative: false},
{name: "DotProdPairsSaturatedMaskedUint8x64", argLength: 3, commutative: false},
{name: "DotProdPairsSaturatedUint8x16", argLength: 2, commutative: false},
{name: "DotProdPairsSaturatedUint8x32", argLength: 2, commutative: false},
{name: "DotProdPairsSaturatedUint8x64", argLength: 2, commutative: false},
{name: "EqualFloat32x4", argLength: 2, commutative: true}, {name: "EqualFloat32x4", argLength: 2, commutative: true},
{name: "EqualFloat32x8", argLength: 2, commutative: true}, {name: "EqualFloat32x8", argLength: 2, commutative: true},
{name: "EqualFloat32x16", argLength: 2, commutative: true}, {name: "EqualFloat32x16", argLength: 2, commutative: true},
@ -398,42 +401,6 @@ func simdGenericOps() []opData {
{name: "FloorFloat32x8", argLength: 1, commutative: false}, {name: "FloorFloat32x8", argLength: 1, commutative: false},
{name: "FloorFloat64x2", argLength: 1, commutative: false}, {name: "FloorFloat64x2", argLength: 1, commutative: false},
{name: "FloorFloat64x4", argLength: 1, commutative: false}, {name: "FloorFloat64x4", argLength: 1, commutative: false},
{name: "FusedMultiplyAddFloat32x4", argLength: 3, commutative: false},
{name: "FusedMultiplyAddFloat32x8", argLength: 3, commutative: false},
{name: "FusedMultiplyAddFloat32x16", argLength: 3, commutative: false},
{name: "FusedMultiplyAddFloat64x2", argLength: 3, commutative: false},
{name: "FusedMultiplyAddFloat64x4", argLength: 3, commutative: false},
{name: "FusedMultiplyAddFloat64x8", argLength: 3, commutative: false},
{name: "FusedMultiplyAddMaskedFloat32x4", argLength: 4, commutative: false},
{name: "FusedMultiplyAddMaskedFloat32x8", argLength: 4, commutative: false},
{name: "FusedMultiplyAddMaskedFloat32x16", argLength: 4, commutative: false},
{name: "FusedMultiplyAddMaskedFloat64x2", argLength: 4, commutative: false},
{name: "FusedMultiplyAddMaskedFloat64x4", argLength: 4, commutative: false},
{name: "FusedMultiplyAddMaskedFloat64x8", argLength: 4, commutative: false},
{name: "FusedMultiplyAddSubFloat32x4", argLength: 3, commutative: false},
{name: "FusedMultiplyAddSubFloat32x8", argLength: 3, commutative: false},
{name: "FusedMultiplyAddSubFloat32x16", argLength: 3, commutative: false},
{name: "FusedMultiplyAddSubFloat64x2", argLength: 3, commutative: false},
{name: "FusedMultiplyAddSubFloat64x4", argLength: 3, commutative: false},
{name: "FusedMultiplyAddSubFloat64x8", argLength: 3, commutative: false},
{name: "FusedMultiplyAddSubMaskedFloat32x4", argLength: 4, commutative: false},
{name: "FusedMultiplyAddSubMaskedFloat32x8", argLength: 4, commutative: false},
{name: "FusedMultiplyAddSubMaskedFloat32x16", argLength: 4, commutative: false},
{name: "FusedMultiplyAddSubMaskedFloat64x2", argLength: 4, commutative: false},
{name: "FusedMultiplyAddSubMaskedFloat64x4", argLength: 4, commutative: false},
{name: "FusedMultiplyAddSubMaskedFloat64x8", argLength: 4, commutative: false},
{name: "FusedMultiplySubAddFloat32x4", argLength: 3, commutative: false},
{name: "FusedMultiplySubAddFloat32x8", argLength: 3, commutative: false},
{name: "FusedMultiplySubAddFloat32x16", argLength: 3, commutative: false},
{name: "FusedMultiplySubAddFloat64x2", argLength: 3, commutative: false},
{name: "FusedMultiplySubAddFloat64x4", argLength: 3, commutative: false},
{name: "FusedMultiplySubAddFloat64x8", argLength: 3, commutative: false},
{name: "FusedMultiplySubAddMaskedFloat32x4", argLength: 4, commutative: false},
{name: "FusedMultiplySubAddMaskedFloat32x8", argLength: 4, commutative: false},
{name: "FusedMultiplySubAddMaskedFloat32x16", argLength: 4, commutative: false},
{name: "FusedMultiplySubAddMaskedFloat64x2", argLength: 4, commutative: false},
{name: "FusedMultiplySubAddMaskedFloat64x4", argLength: 4, commutative: false},
{name: "FusedMultiplySubAddMaskedFloat64x8", argLength: 4, commutative: false},
{name: "GaloisFieldMulMaskedUint8x16", argLength: 3, commutative: false}, {name: "GaloisFieldMulMaskedUint8x16", argLength: 3, commutative: false},
{name: "GaloisFieldMulMaskedUint8x32", argLength: 3, commutative: false}, {name: "GaloisFieldMulMaskedUint8x32", argLength: 3, commutative: false},
{name: "GaloisFieldMulMaskedUint8x64", argLength: 3, commutative: false}, {name: "GaloisFieldMulMaskedUint8x64", argLength: 3, commutative: false},
@ -852,22 +819,34 @@ func simdGenericOps() []opData {
{name: "MinUint64x2", argLength: 2, commutative: true}, {name: "MinUint64x2", argLength: 2, commutative: true},
{name: "MinUint64x4", argLength: 2, commutative: true}, {name: "MinUint64x4", argLength: 2, commutative: true},
{name: "MinUint64x8", argLength: 2, commutative: true}, {name: "MinUint64x8", argLength: 2, commutative: true},
{name: "MulAddFloat32x4", argLength: 3, commutative: false},
{name: "MulAddFloat32x8", argLength: 3, commutative: false},
{name: "MulAddFloat32x16", argLength: 3, commutative: false},
{name: "MulAddFloat64x2", argLength: 3, commutative: false},
{name: "MulAddFloat64x4", argLength: 3, commutative: false},
{name: "MulAddFloat64x8", argLength: 3, commutative: false},
{name: "MulAddMaskedFloat32x4", argLength: 4, commutative: false},
{name: "MulAddMaskedFloat32x8", argLength: 4, commutative: false},
{name: "MulAddMaskedFloat32x16", argLength: 4, commutative: false},
{name: "MulAddMaskedFloat64x2", argLength: 4, commutative: false},
{name: "MulAddMaskedFloat64x4", argLength: 4, commutative: false},
{name: "MulAddMaskedFloat64x8", argLength: 4, commutative: false},
{name: "MulAddSubFloat32x4", argLength: 3, commutative: false},
{name: "MulAddSubFloat32x8", argLength: 3, commutative: false},
{name: "MulAddSubFloat32x16", argLength: 3, commutative: false},
{name: "MulAddSubFloat64x2", argLength: 3, commutative: false},
{name: "MulAddSubFloat64x4", argLength: 3, commutative: false},
{name: "MulAddSubFloat64x8", argLength: 3, commutative: false},
{name: "MulAddSubMaskedFloat32x4", argLength: 4, commutative: false},
{name: "MulAddSubMaskedFloat32x8", argLength: 4, commutative: false},
{name: "MulAddSubMaskedFloat32x16", argLength: 4, commutative: false},
{name: "MulAddSubMaskedFloat64x2", argLength: 4, commutative: false},
{name: "MulAddSubMaskedFloat64x4", argLength: 4, commutative: false},
{name: "MulAddSubMaskedFloat64x8", argLength: 4, commutative: false},
{name: "MulEvenWidenInt32x4", argLength: 2, commutative: true}, {name: "MulEvenWidenInt32x4", argLength: 2, commutative: true},
{name: "MulEvenWidenInt32x8", argLength: 2, commutative: true}, {name: "MulEvenWidenInt32x8", argLength: 2, commutative: true},
{name: "MulEvenWidenInt64x2", argLength: 2, commutative: true},
{name: "MulEvenWidenInt64x4", argLength: 2, commutative: true},
{name: "MulEvenWidenInt64x8", argLength: 2, commutative: true},
{name: "MulEvenWidenMaskedInt64x2", argLength: 3, commutative: true},
{name: "MulEvenWidenMaskedInt64x4", argLength: 3, commutative: true},
{name: "MulEvenWidenMaskedInt64x8", argLength: 3, commutative: true},
{name: "MulEvenWidenMaskedUint64x2", argLength: 3, commutative: true},
{name: "MulEvenWidenMaskedUint64x4", argLength: 3, commutative: true},
{name: "MulEvenWidenMaskedUint64x8", argLength: 3, commutative: true},
{name: "MulEvenWidenUint32x4", argLength: 2, commutative: true}, {name: "MulEvenWidenUint32x4", argLength: 2, commutative: true},
{name: "MulEvenWidenUint32x8", argLength: 2, commutative: true}, {name: "MulEvenWidenUint32x8", argLength: 2, commutative: true},
{name: "MulEvenWidenUint64x2", argLength: 2, commutative: true},
{name: "MulEvenWidenUint64x4", argLength: 2, commutative: true},
{name: "MulEvenWidenUint64x8", argLength: 2, commutative: true},
{name: "MulFloat32x4", argLength: 2, commutative: true}, {name: "MulFloat32x4", argLength: 2, commutative: true},
{name: "MulFloat32x8", argLength: 2, commutative: true}, {name: "MulFloat32x8", argLength: 2, commutative: true},
{name: "MulFloat32x16", argLength: 2, commutative: true}, {name: "MulFloat32x16", argLength: 2, commutative: true},
@ -880,12 +859,6 @@ func simdGenericOps() []opData {
{name: "MulHighMaskedInt16x8", argLength: 3, commutative: true}, {name: "MulHighMaskedInt16x8", argLength: 3, commutative: true},
{name: "MulHighMaskedInt16x16", argLength: 3, commutative: true}, {name: "MulHighMaskedInt16x16", argLength: 3, commutative: true},
{name: "MulHighMaskedInt16x32", argLength: 3, commutative: true}, {name: "MulHighMaskedInt16x32", argLength: 3, commutative: true},
{name: "MulHighMaskedUint16x8", argLength: 3, commutative: true},
{name: "MulHighMaskedUint16x16", argLength: 3, commutative: true},
{name: "MulHighMaskedUint16x32", argLength: 3, commutative: true},
{name: "MulHighUint16x8", argLength: 2, commutative: true},
{name: "MulHighUint16x16", argLength: 2, commutative: true},
{name: "MulHighUint16x32", argLength: 2, commutative: true},
{name: "MulInt16x8", argLength: 2, commutative: true}, {name: "MulInt16x8", argLength: 2, commutative: true},
{name: "MulInt16x16", argLength: 2, commutative: true}, {name: "MulInt16x16", argLength: 2, commutative: true},
{name: "MulInt16x32", argLength: 2, commutative: true}, {name: "MulInt16x32", argLength: 2, commutative: true},
@ -910,6 +883,36 @@ func simdGenericOps() []opData {
{name: "MulMaskedInt64x2", argLength: 3, commutative: true}, {name: "MulMaskedInt64x2", argLength: 3, commutative: true},
{name: "MulMaskedInt64x4", argLength: 3, commutative: true}, {name: "MulMaskedInt64x4", argLength: 3, commutative: true},
{name: "MulMaskedInt64x8", argLength: 3, commutative: true}, {name: "MulMaskedInt64x8", argLength: 3, commutative: true},
{name: "MulMaskedUint16x8", argLength: 3, commutative: true},
{name: "MulMaskedUint16x16", argLength: 3, commutative: true},
{name: "MulMaskedUint16x32", argLength: 3, commutative: true},
{name: "MulMaskedUint32x4", argLength: 3, commutative: true},
{name: "MulMaskedUint32x8", argLength: 3, commutative: true},
{name: "MulMaskedUint32x16", argLength: 3, commutative: true},
{name: "MulMaskedUint64x2", argLength: 3, commutative: true},
{name: "MulMaskedUint64x4", argLength: 3, commutative: true},
{name: "MulMaskedUint64x8", argLength: 3, commutative: true},
{name: "MulSubAddFloat32x4", argLength: 3, commutative: false},
{name: "MulSubAddFloat32x8", argLength: 3, commutative: false},
{name: "MulSubAddFloat32x16", argLength: 3, commutative: false},
{name: "MulSubAddFloat64x2", argLength: 3, commutative: false},
{name: "MulSubAddFloat64x4", argLength: 3, commutative: false},
{name: "MulSubAddFloat64x8", argLength: 3, commutative: false},
{name: "MulSubAddMaskedFloat32x4", argLength: 4, commutative: false},
{name: "MulSubAddMaskedFloat32x8", argLength: 4, commutative: false},
{name: "MulSubAddMaskedFloat32x16", argLength: 4, commutative: false},
{name: "MulSubAddMaskedFloat64x2", argLength: 4, commutative: false},
{name: "MulSubAddMaskedFloat64x4", argLength: 4, commutative: false},
{name: "MulSubAddMaskedFloat64x8", argLength: 4, commutative: false},
{name: "MulUint16x8", argLength: 2, commutative: true},
{name: "MulUint16x16", argLength: 2, commutative: true},
{name: "MulUint16x32", argLength: 2, commutative: true},
{name: "MulUint32x4", argLength: 2, commutative: true},
{name: "MulUint32x8", argLength: 2, commutative: true},
{name: "MulUint32x16", argLength: 2, commutative: true},
{name: "MulUint64x2", argLength: 2, commutative: true},
{name: "MulUint64x4", argLength: 2, commutative: true},
{name: "MulUint64x8", argLength: 2, commutative: true},
{name: "NotEqualFloat32x4", argLength: 2, commutative: true}, {name: "NotEqualFloat32x4", argLength: 2, commutative: true},
{name: "NotEqualFloat32x8", argLength: 2, commutative: true}, {name: "NotEqualFloat32x8", argLength: 2, commutative: true},
{name: "NotEqualFloat32x16", argLength: 2, commutative: true}, {name: "NotEqualFloat32x16", argLength: 2, commutative: true},
@ -970,6 +973,54 @@ func simdGenericOps() []opData {
{name: "NotEqualUint64x2", argLength: 2, commutative: true}, {name: "NotEqualUint64x2", argLength: 2, commutative: true},
{name: "NotEqualUint64x4", argLength: 2, commutative: true}, {name: "NotEqualUint64x4", argLength: 2, commutative: true},
{name: "NotEqualUint64x8", argLength: 2, commutative: true}, {name: "NotEqualUint64x8", argLength: 2, commutative: true},
{name: "OnesCountInt8x16", argLength: 1, commutative: false},
{name: "OnesCountInt8x32", argLength: 1, commutative: false},
{name: "OnesCountInt8x64", argLength: 1, commutative: false},
{name: "OnesCountInt16x8", argLength: 1, commutative: false},
{name: "OnesCountInt16x16", argLength: 1, commutative: false},
{name: "OnesCountInt16x32", argLength: 1, commutative: false},
{name: "OnesCountInt32x4", argLength: 1, commutative: false},
{name: "OnesCountInt32x8", argLength: 1, commutative: false},
{name: "OnesCountInt32x16", argLength: 1, commutative: false},
{name: "OnesCountInt64x2", argLength: 1, commutative: false},
{name: "OnesCountInt64x4", argLength: 1, commutative: false},
{name: "OnesCountInt64x8", argLength: 1, commutative: false},
{name: "OnesCountMaskedInt8x16", argLength: 2, commutative: false},
{name: "OnesCountMaskedInt8x32", argLength: 2, commutative: false},
{name: "OnesCountMaskedInt8x64", argLength: 2, commutative: false},
{name: "OnesCountMaskedInt16x8", argLength: 2, commutative: false},
{name: "OnesCountMaskedInt16x16", argLength: 2, commutative: false},
{name: "OnesCountMaskedInt16x32", argLength: 2, commutative: false},
{name: "OnesCountMaskedInt32x4", argLength: 2, commutative: false},
{name: "OnesCountMaskedInt32x8", argLength: 2, commutative: false},
{name: "OnesCountMaskedInt32x16", argLength: 2, commutative: false},
{name: "OnesCountMaskedInt64x2", argLength: 2, commutative: false},
{name: "OnesCountMaskedInt64x4", argLength: 2, commutative: false},
{name: "OnesCountMaskedInt64x8", argLength: 2, commutative: false},
{name: "OnesCountMaskedUint8x16", argLength: 2, commutative: false},
{name: "OnesCountMaskedUint8x32", argLength: 2, commutative: false},
{name: "OnesCountMaskedUint8x64", argLength: 2, commutative: false},
{name: "OnesCountMaskedUint16x8", argLength: 2, commutative: false},
{name: "OnesCountMaskedUint16x16", argLength: 2, commutative: false},
{name: "OnesCountMaskedUint16x32", argLength: 2, commutative: false},
{name: "OnesCountMaskedUint32x4", argLength: 2, commutative: false},
{name: "OnesCountMaskedUint32x8", argLength: 2, commutative: false},
{name: "OnesCountMaskedUint32x16", argLength: 2, commutative: false},
{name: "OnesCountMaskedUint64x2", argLength: 2, commutative: false},
{name: "OnesCountMaskedUint64x4", argLength: 2, commutative: false},
{name: "OnesCountMaskedUint64x8", argLength: 2, commutative: false},
{name: "OnesCountUint8x16", argLength: 1, commutative: false},
{name: "OnesCountUint8x32", argLength: 1, commutative: false},
{name: "OnesCountUint8x64", argLength: 1, commutative: false},
{name: "OnesCountUint16x8", argLength: 1, commutative: false},
{name: "OnesCountUint16x16", argLength: 1, commutative: false},
{name: "OnesCountUint16x32", argLength: 1, commutative: false},
{name: "OnesCountUint32x4", argLength: 1, commutative: false},
{name: "OnesCountUint32x8", argLength: 1, commutative: false},
{name: "OnesCountUint32x16", argLength: 1, commutative: false},
{name: "OnesCountUint64x2", argLength: 1, commutative: false},
{name: "OnesCountUint64x4", argLength: 1, commutative: false},
{name: "OnesCountUint64x8", argLength: 1, commutative: false},
{name: "OrInt8x16", argLength: 2, commutative: true}, {name: "OrInt8x16", argLength: 2, commutative: true},
{name: "OrInt8x32", argLength: 2, commutative: true}, {name: "OrInt8x32", argLength: 2, commutative: true},
{name: "OrInt8x64", argLength: 2, commutative: true}, {name: "OrInt8x64", argLength: 2, commutative: true},
@ -1006,12 +1057,6 @@ func simdGenericOps() []opData {
{name: "OrUint64x2", argLength: 2, commutative: true}, {name: "OrUint64x2", argLength: 2, commutative: true},
{name: "OrUint64x4", argLength: 2, commutative: true}, {name: "OrUint64x4", argLength: 2, commutative: true},
{name: "OrUint64x8", argLength: 2, commutative: true}, {name: "OrUint64x8", argLength: 2, commutative: true},
{name: "PairDotProdInt16x8", argLength: 2, commutative: false},
{name: "PairDotProdInt16x16", argLength: 2, commutative: false},
{name: "PairDotProdInt16x32", argLength: 2, commutative: false},
{name: "PairDotProdMaskedInt16x8", argLength: 3, commutative: false},
{name: "PairDotProdMaskedInt16x16", argLength: 3, commutative: false},
{name: "PairDotProdMaskedInt16x32", argLength: 3, commutative: false},
{name: "Permute2Float32x4", argLength: 3, commutative: false}, {name: "Permute2Float32x4", argLength: 3, commutative: false},
{name: "Permute2Float32x8", argLength: 3, commutative: false}, {name: "Permute2Float32x8", argLength: 3, commutative: false},
{name: "Permute2Float32x16", argLength: 3, commutative: false}, {name: "Permute2Float32x16", argLength: 3, commutative: false},
@ -1120,54 +1165,30 @@ func simdGenericOps() []opData {
{name: "PermuteUint32x16", argLength: 2, commutative: false}, {name: "PermuteUint32x16", argLength: 2, commutative: false},
{name: "PermuteUint64x4", argLength: 2, commutative: false}, {name: "PermuteUint64x4", argLength: 2, commutative: false},
{name: "PermuteUint64x8", argLength: 2, commutative: false}, {name: "PermuteUint64x8", argLength: 2, commutative: false},
{name: "PopCountInt8x16", argLength: 1, commutative: false}, {name: "ReciprocalFloat32x4", argLength: 1, commutative: false},
{name: "PopCountInt8x32", argLength: 1, commutative: false}, {name: "ReciprocalFloat32x8", argLength: 1, commutative: false},
{name: "PopCountInt8x64", argLength: 1, commutative: false}, {name: "ReciprocalFloat32x16", argLength: 1, commutative: false},
{name: "PopCountInt16x8", argLength: 1, commutative: false}, {name: "ReciprocalFloat64x2", argLength: 1, commutative: false},
{name: "PopCountInt16x16", argLength: 1, commutative: false}, {name: "ReciprocalFloat64x4", argLength: 1, commutative: false},
{name: "PopCountInt16x32", argLength: 1, commutative: false}, {name: "ReciprocalFloat64x8", argLength: 1, commutative: false},
{name: "PopCountInt32x4", argLength: 1, commutative: false}, {name: "ReciprocalMaskedFloat32x4", argLength: 2, commutative: false},
{name: "PopCountInt32x8", argLength: 1, commutative: false}, {name: "ReciprocalMaskedFloat32x8", argLength: 2, commutative: false},
{name: "PopCountInt32x16", argLength: 1, commutative: false}, {name: "ReciprocalMaskedFloat32x16", argLength: 2, commutative: false},
{name: "PopCountInt64x2", argLength: 1, commutative: false}, {name: "ReciprocalMaskedFloat64x2", argLength: 2, commutative: false},
{name: "PopCountInt64x4", argLength: 1, commutative: false}, {name: "ReciprocalMaskedFloat64x4", argLength: 2, commutative: false},
{name: "PopCountInt64x8", argLength: 1, commutative: false}, {name: "ReciprocalMaskedFloat64x8", argLength: 2, commutative: false},
{name: "PopCountMaskedInt8x16", argLength: 2, commutative: false}, {name: "ReciprocalSqrtFloat32x4", argLength: 1, commutative: false},
{name: "PopCountMaskedInt8x32", argLength: 2, commutative: false}, {name: "ReciprocalSqrtFloat32x8", argLength: 1, commutative: false},
{name: "PopCountMaskedInt8x64", argLength: 2, commutative: false}, {name: "ReciprocalSqrtFloat32x16", argLength: 1, commutative: false},
{name: "PopCountMaskedInt16x8", argLength: 2, commutative: false}, {name: "ReciprocalSqrtFloat64x2", argLength: 1, commutative: false},
{name: "PopCountMaskedInt16x16", argLength: 2, commutative: false}, {name: "ReciprocalSqrtFloat64x4", argLength: 1, commutative: false},
{name: "PopCountMaskedInt16x32", argLength: 2, commutative: false}, {name: "ReciprocalSqrtFloat64x8", argLength: 1, commutative: false},
{name: "PopCountMaskedInt32x4", argLength: 2, commutative: false}, {name: "ReciprocalSqrtMaskedFloat32x4", argLength: 2, commutative: false},
{name: "PopCountMaskedInt32x8", argLength: 2, commutative: false}, {name: "ReciprocalSqrtMaskedFloat32x8", argLength: 2, commutative: false},
{name: "PopCountMaskedInt32x16", argLength: 2, commutative: false}, {name: "ReciprocalSqrtMaskedFloat32x16", argLength: 2, commutative: false},
{name: "PopCountMaskedInt64x2", argLength: 2, commutative: false}, {name: "ReciprocalSqrtMaskedFloat64x2", argLength: 2, commutative: false},
{name: "PopCountMaskedInt64x4", argLength: 2, commutative: false}, {name: "ReciprocalSqrtMaskedFloat64x4", argLength: 2, commutative: false},
{name: "PopCountMaskedInt64x8", argLength: 2, commutative: false}, {name: "ReciprocalSqrtMaskedFloat64x8", argLength: 2, commutative: false},
{name: "PopCountMaskedUint8x16", argLength: 2, commutative: false},
{name: "PopCountMaskedUint8x32", argLength: 2, commutative: false},
{name: "PopCountMaskedUint8x64", argLength: 2, commutative: false},
{name: "PopCountMaskedUint16x8", argLength: 2, commutative: false},
{name: "PopCountMaskedUint16x16", argLength: 2, commutative: false},
{name: "PopCountMaskedUint16x32", argLength: 2, commutative: false},
{name: "PopCountMaskedUint32x4", argLength: 2, commutative: false},
{name: "PopCountMaskedUint32x8", argLength: 2, commutative: false},
{name: "PopCountMaskedUint32x16", argLength: 2, commutative: false},
{name: "PopCountMaskedUint64x2", argLength: 2, commutative: false},
{name: "PopCountMaskedUint64x4", argLength: 2, commutative: false},
{name: "PopCountMaskedUint64x8", argLength: 2, commutative: false},
{name: "PopCountUint8x16", argLength: 1, commutative: false},
{name: "PopCountUint8x32", argLength: 1, commutative: false},
{name: "PopCountUint8x64", argLength: 1, commutative: false},
{name: "PopCountUint16x8", argLength: 1, commutative: false},
{name: "PopCountUint16x16", argLength: 1, commutative: false},
{name: "PopCountUint16x32", argLength: 1, commutative: false},
{name: "PopCountUint32x4", argLength: 1, commutative: false},
{name: "PopCountUint32x8", argLength: 1, commutative: false},
{name: "PopCountUint32x16", argLength: 1, commutative: false},
{name: "PopCountUint64x2", argLength: 1, commutative: false},
{name: "PopCountUint64x4", argLength: 1, commutative: false},
{name: "PopCountUint64x8", argLength: 1, commutative: false},
{name: "RotateLeftInt32x4", argLength: 2, commutative: false}, {name: "RotateLeftInt32x4", argLength: 2, commutative: false},
{name: "RotateLeftInt32x8", argLength: 2, commutative: false}, {name: "RotateLeftInt32x8", argLength: 2, commutative: false},
{name: "RotateLeftInt32x16", argLength: 2, commutative: false}, {name: "RotateLeftInt32x16", argLength: 2, commutative: false},
@ -1216,28 +1237,10 @@ func simdGenericOps() []opData {
{name: "RotateRightUint64x2", argLength: 2, commutative: false}, {name: "RotateRightUint64x2", argLength: 2, commutative: false},
{name: "RotateRightUint64x4", argLength: 2, commutative: false}, {name: "RotateRightUint64x4", argLength: 2, commutative: false},
{name: "RotateRightUint64x8", argLength: 2, commutative: false}, {name: "RotateRightUint64x8", argLength: 2, commutative: false},
{name: "RoundFloat32x4", argLength: 1, commutative: false}, {name: "RoundToEvenFloat32x4", argLength: 1, commutative: false},
{name: "RoundFloat32x8", argLength: 1, commutative: false}, {name: "RoundToEvenFloat32x8", argLength: 1, commutative: false},
{name: "RoundFloat64x2", argLength: 1, commutative: false}, {name: "RoundToEvenFloat64x2", argLength: 1, commutative: false},
{name: "RoundFloat64x4", argLength: 1, commutative: false}, {name: "RoundToEvenFloat64x4", argLength: 1, commutative: false},
{name: "SaturatedAddDotProdInt32x4", argLength: 3, commutative: false},
{name: "SaturatedAddDotProdInt32x8", argLength: 3, commutative: false},
{name: "SaturatedAddDotProdInt32x16", argLength: 3, commutative: false},
{name: "SaturatedAddDotProdMaskedInt32x4", argLength: 4, commutative: false},
{name: "SaturatedAddDotProdMaskedInt32x8", argLength: 4, commutative: false},
{name: "SaturatedAddDotProdMaskedInt32x16", argLength: 4, commutative: false},
{name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x16", argLength: 3, commutative: false},
{name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x32", argLength: 3, commutative: false},
{name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x64", argLength: 3, commutative: false},
{name: "SaturatedUnsignedSignedPairDotProdUint8x16", argLength: 2, commutative: false},
{name: "SaturatedUnsignedSignedPairDotProdUint8x32", argLength: 2, commutative: false},
{name: "SaturatedUnsignedSignedPairDotProdUint8x64", argLength: 2, commutative: false},
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 3, commutative: false},
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 3, commutative: false},
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 3, commutative: false},
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4", argLength: 4, commutative: false},
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8", argLength: 4, commutative: false},
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16", argLength: 4, commutative: false},
{name: "ScaleFloat32x4", argLength: 2, commutative: false}, {name: "ScaleFloat32x4", argLength: 2, commutative: false},
{name: "ScaleFloat32x8", argLength: 2, commutative: false}, {name: "ScaleFloat32x8", argLength: 2, commutative: false},
{name: "ScaleFloat32x16", argLength: 2, commutative: false}, {name: "ScaleFloat32x16", argLength: 2, commutative: false},
@ -1506,12 +1509,6 @@ func simdGenericOps() []opData {
{name: "ShiftRightUint64x2", argLength: 2, commutative: false}, {name: "ShiftRightUint64x2", argLength: 2, commutative: false},
{name: "ShiftRightUint64x4", argLength: 2, commutative: false}, {name: "ShiftRightUint64x4", argLength: 2, commutative: false},
{name: "ShiftRightUint64x8", argLength: 2, commutative: false}, {name: "ShiftRightUint64x8", argLength: 2, commutative: false},
{name: "SignInt8x16", argLength: 2, commutative: false},
{name: "SignInt8x32", argLength: 2, commutative: false},
{name: "SignInt16x8", argLength: 2, commutative: false},
{name: "SignInt16x16", argLength: 2, commutative: false},
{name: "SignInt32x4", argLength: 2, commutative: false},
{name: "SignInt32x8", argLength: 2, commutative: false},
{name: "SqrtFloat32x4", argLength: 1, commutative: false}, {name: "SqrtFloat32x4", argLength: 1, commutative: false},
{name: "SqrtFloat32x8", argLength: 1, commutative: false}, {name: "SqrtFloat32x8", argLength: 1, commutative: false},
{name: "SqrtFloat32x16", argLength: 1, commutative: false}, {name: "SqrtFloat32x16", argLength: 1, commutative: false},
@ -1626,12 +1623,6 @@ func simdGenericOps() []opData {
{name: "TruncFloat32x8", argLength: 1, commutative: false}, {name: "TruncFloat32x8", argLength: 1, commutative: false},
{name: "TruncFloat64x2", argLength: 1, commutative: false}, {name: "TruncFloat64x2", argLength: 1, commutative: false},
{name: "TruncFloat64x4", argLength: 1, commutative: false}, {name: "TruncFloat64x4", argLength: 1, commutative: false},
{name: "UnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 3, commutative: false},
{name: "UnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 3, commutative: false},
{name: "UnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 3, commutative: false},
{name: "UnsignedSignedQuadDotProdAccumulateMaskedInt32x4", argLength: 4, commutative: false},
{name: "UnsignedSignedQuadDotProdAccumulateMaskedInt32x8", argLength: 4, commutative: false},
{name: "UnsignedSignedQuadDotProdAccumulateMaskedInt32x16", argLength: 4, commutative: false},
{name: "XorInt8x16", argLength: 2, commutative: true}, {name: "XorInt8x16", argLength: 2, commutative: true},
{name: "XorInt8x32", argLength: 2, commutative: true}, {name: "XorInt8x32", argLength: 2, commutative: true},
{name: "XorInt8x64", argLength: 2, commutative: true}, {name: "XorInt8x64", argLength: 2, commutative: true},
@ -1790,30 +1781,30 @@ func simdGenericOps() []opData {
{name: "RotateAllRightUint64x2", argLength: 1, commutative: false, aux: "Int8"}, {name: "RotateAllRightUint64x2", argLength: 1, commutative: false, aux: "Int8"},
{name: "RotateAllRightUint64x4", argLength: 1, commutative: false, aux: "Int8"}, {name: "RotateAllRightUint64x4", argLength: 1, commutative: false, aux: "Int8"},
{name: "RotateAllRightUint64x8", argLength: 1, commutative: false, aux: "Int8"}, {name: "RotateAllRightUint64x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "RoundScaledFloat32x4", argLength: 1, commutative: false, aux: "Int8"}, {name: "RoundToEvenScaledFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
{name: "RoundScaledFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, {name: "RoundToEvenScaledFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "RoundScaledFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, {name: "RoundToEvenScaledFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
{name: "RoundScaledFloat64x2", argLength: 1, commutative: false, aux: "Int8"}, {name: "RoundToEvenScaledFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
{name: "RoundScaledFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, {name: "RoundToEvenScaledFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
{name: "RoundScaledFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, {name: "RoundToEvenScaledFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "RoundScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "RoundToEvenScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "RoundScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "RoundToEvenScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "RoundScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "RoundToEvenScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
{name: "RoundScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, {name: "RoundToEvenScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
{name: "RoundScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "RoundToEvenScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "RoundScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "RoundToEvenScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "RoundScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "Int8"}, {name: "RoundToEvenScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
{name: "RoundScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, {name: "RoundToEvenScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "RoundScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, {name: "RoundToEvenScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
{name: "RoundScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "Int8"}, {name: "RoundToEvenScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
{name: "RoundScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, {name: "RoundToEvenScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
{name: "RoundScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, {name: "RoundToEvenScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "RoundScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "RoundToEvenScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "RoundScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "RoundToEvenScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "RoundScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "RoundToEvenScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
{name: "RoundScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, {name: "RoundToEvenScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
{name: "RoundScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "RoundToEvenScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "RoundScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "RoundToEvenScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "Int8"},
{name: "SetElemInt16x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "SetElemInt16x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "SetElemInt32x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "SetElemInt32x4", argLength: 2, commutative: false, aux: "Int8"},

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -11,30 +11,30 @@ import (
const simdPackage = "simd" const simdPackage = "simd"
func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) { func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) {
addF(simdPackage, "Int8x16.Absolute", opLen1(ssa.OpAbsoluteInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x16.Abs", opLen1(ssa.OpAbsInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.Absolute", opLen1(ssa.OpAbsoluteInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x32.Abs", opLen1(ssa.OpAbsInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.Absolute", opLen1(ssa.OpAbsoluteInt8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x64.Abs", opLen1(ssa.OpAbsInt8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.Absolute", opLen1(ssa.OpAbsoluteInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.Abs", opLen1(ssa.OpAbsInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.Absolute", opLen1(ssa.OpAbsoluteInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x16.Abs", opLen1(ssa.OpAbsInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x32.Absolute", opLen1(ssa.OpAbsoluteInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.Abs", opLen1(ssa.OpAbsInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.Absolute", opLen1(ssa.OpAbsoluteInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int32x4.Abs", opLen1(ssa.OpAbsInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.Absolute", opLen1(ssa.OpAbsoluteInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int32x8.Abs", opLen1(ssa.OpAbsInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.Absolute", opLen1(ssa.OpAbsoluteInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int32x16.Abs", opLen1(ssa.OpAbsInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int64x2.Absolute", opLen1(ssa.OpAbsoluteInt64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int64x2.Abs", opLen1(ssa.OpAbsInt64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x4.Absolute", opLen1(ssa.OpAbsoluteInt64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int64x4.Abs", opLen1(ssa.OpAbsInt64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int64x8.Absolute", opLen1(ssa.OpAbsoluteInt64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int64x8.Abs", opLen1(ssa.OpAbsInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x16.AbsMasked", opLen2(ssa.OpAbsMaskedInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x32.AbsMasked", opLen2(ssa.OpAbsMaskedInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x64.AbsMasked", opLen2(ssa.OpAbsMaskedInt8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.AbsMasked", opLen2(ssa.OpAbsMaskedInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x16.AbsMasked", opLen2(ssa.OpAbsMaskedInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x32.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.AbsMasked", opLen2(ssa.OpAbsMaskedInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int32x4.AbsMasked", opLen2(ssa.OpAbsMaskedInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int32x8.AbsMasked", opLen2(ssa.OpAbsMaskedInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int32x16.AbsMasked", opLen2(ssa.OpAbsMaskedInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int64x2.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int64x2.AbsMasked", opLen2(ssa.OpAbsMaskedInt64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x4.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int64x4.AbsMasked", opLen2(ssa.OpAbsMaskedInt64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int64x8.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int64x8.AbsMasked", opLen2(ssa.OpAbsMaskedInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.Add", opLen2(ssa.OpAddFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.Add", opLen2(ssa.OpAddFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.Add", opLen2(ssa.OpAddFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x8.Add", opLen2(ssa.OpAddFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.Add", opLen2(ssa.OpAddFloat32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x16.Add", opLen2(ssa.OpAddFloat32x16, types.TypeVec512), sys.AMD64)
@ -65,12 +65,24 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint64x2.Add", opLen2(ssa.OpAddUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x2.Add", opLen2(ssa.OpAddUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.Add", opLen2(ssa.OpAddUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x4.Add", opLen2(ssa.OpAddUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.Add", opLen2(ssa.OpAddUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint64x8.Add", opLen2(ssa.OpAddUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.AddDotProd", opLen3(ssa.OpAddDotProdInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int32x4.AddDotProdPairsSaturated", opLen3(ssa.OpAddDotProdPairsSaturatedInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.AddDotProd", opLen3(ssa.OpAddDotProdInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int32x8.AddDotProdPairsSaturated", opLen3(ssa.OpAddDotProdPairsSaturatedInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.AddDotProd", opLen3(ssa.OpAddDotProdInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int32x16.AddDotProdPairsSaturated", opLen3(ssa.OpAddDotProdPairsSaturatedInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.AddDotProdMasked", opLen4(ssa.OpAddDotProdMaskedInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int32x4.AddDotProdPairsSaturatedMasked", opLen4(ssa.OpAddDotProdPairsSaturatedMaskedInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.AddDotProdMasked", opLen4(ssa.OpAddDotProdMaskedInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int32x8.AddDotProdPairsSaturatedMasked", opLen4(ssa.OpAddDotProdPairsSaturatedMaskedInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.AddDotProdMasked", opLen4(ssa.OpAddDotProdMaskedInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int32x16.AddDotProdPairsSaturatedMasked", opLen4(ssa.OpAddDotProdPairsSaturatedMaskedInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.AddDotProdQuadruple", opLen3_31(ssa.OpAddDotProdQuadrupleInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.AddDotProdQuadruple", opLen3_31(ssa.OpAddDotProdQuadrupleInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.AddDotProdQuadruple", opLen3_31(ssa.OpAddDotProdQuadrupleInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.AddDotProdQuadrupleMasked", opLen4_31(ssa.OpAddDotProdQuadrupleMaskedInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.AddDotProdQuadrupleMasked", opLen4_31(ssa.OpAddDotProdQuadrupleMaskedInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.AddDotProdQuadrupleMasked", opLen4_31(ssa.OpAddDotProdQuadrupleMaskedInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.AddDotProdQuadrupleSaturated", opLen3_31(ssa.OpAddDotProdQuadrupleSaturatedInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.AddDotProdQuadrupleSaturated", opLen3_31(ssa.OpAddDotProdQuadrupleSaturatedInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.AddDotProdQuadrupleSaturated", opLen3_31(ssa.OpAddDotProdQuadrupleSaturatedInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.AddDotProdQuadrupleSaturatedMasked", opLen4_31(ssa.OpAddDotProdQuadrupleSaturatedMaskedInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.AddDotProdQuadrupleSaturatedMasked", opLen4_31(ssa.OpAddDotProdQuadrupleSaturatedMaskedInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.AddDotProdQuadrupleSaturatedMasked", opLen4_31(ssa.OpAddDotProdQuadrupleSaturatedMaskedInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.AddMasked", opLen3(ssa.OpAddMaskedFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.AddMasked", opLen3(ssa.OpAddMaskedFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.AddMasked", opLen3(ssa.OpAddMaskedFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x8.AddMasked", opLen3(ssa.OpAddMaskedFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.AddMasked", opLen3(ssa.OpAddMaskedFloat32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x16.AddMasked", opLen3(ssa.OpAddMaskedFloat32x16, types.TypeVec512), sys.AMD64)
@ -215,30 +227,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint64x2.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x2.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint64x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.ApproximateReciprocalMasked", opLen2(ssa.OpApproximateReciprocalMaskedFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.ApproximateReciprocalMasked", opLen2(ssa.OpApproximateReciprocalMaskedFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.ApproximateReciprocalMasked", opLen2(ssa.OpApproximateReciprocalMaskedFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.ApproximateReciprocalMasked", opLen2(ssa.OpApproximateReciprocalMaskedFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.ApproximateReciprocalMasked", opLen2(ssa.OpApproximateReciprocalMaskedFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.ApproximateReciprocalMasked", opLen2(ssa.OpApproximateReciprocalMaskedFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.ApproximateReciprocalOfSqrtMasked", opLen2(ssa.OpApproximateReciprocalOfSqrtMaskedFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.ApproximateReciprocalOfSqrtMasked", opLen2(ssa.OpApproximateReciprocalOfSqrtMaskedFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.ApproximateReciprocalOfSqrtMasked", opLen2(ssa.OpApproximateReciprocalOfSqrtMaskedFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.ApproximateReciprocalOfSqrtMasked", opLen2(ssa.OpApproximateReciprocalOfSqrtMaskedFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.ApproximateReciprocalOfSqrtMasked", opLen2(ssa.OpApproximateReciprocalOfSqrtMaskedFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.ApproximateReciprocalOfSqrtMasked", opLen2(ssa.OpApproximateReciprocalOfSqrtMaskedFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.Average", opLen2(ssa.OpAverageUint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint8x16.Average", opLen2(ssa.OpAverageUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.Average", opLen2(ssa.OpAverageUint8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x32.Average", opLen2(ssa.OpAverageUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x64.Average", opLen2(ssa.OpAverageUint8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint8x64.Average", opLen2(ssa.OpAverageUint8x64, types.TypeVec512), sys.AMD64)
@ -321,6 +309,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Float32x4.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x8.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x16.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.CopySign", opLen2(ssa.OpCopySignInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.CopySign", opLen2(ssa.OpCopySignInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x8.CopySign", opLen2(ssa.OpCopySignInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.CopySign", opLen2(ssa.OpCopySignInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x4.CopySign", opLen2(ssa.OpCopySignInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.CopySign", opLen2(ssa.OpCopySignInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x4.Div", opLen2(ssa.OpDivFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.Div", opLen2(ssa.OpDivFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.Div", opLen2(ssa.OpDivFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x8.Div", opLen2(ssa.OpDivFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.Div", opLen2(ssa.OpDivFloat32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x16.Div", opLen2(ssa.OpDivFloat32x16, types.TypeVec512), sys.AMD64)
@ -333,9 +327,18 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Float64x2.DivMasked", opLen3(ssa.OpDivMaskedFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x2.DivMasked", opLen3(ssa.OpDivMaskedFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.DivMasked", opLen3(ssa.OpDivMaskedFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x4.DivMasked", opLen3(ssa.OpDivMaskedFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.DivMasked", opLen3(ssa.OpDivMaskedFloat64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float64x8.DivMasked", opLen3(ssa.OpDivMaskedFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.DotProdBroadcast", opLen2(ssa.OpDotProdBroadcastFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.DotProdPairs", opLen2(ssa.OpDotProdPairsInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.DotProdBroadcast", opLen2(ssa.OpDotProdBroadcastFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x16.DotProdPairs", opLen2(ssa.OpDotProdPairsInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x2.DotProdBroadcast", opLen2(ssa.OpDotProdBroadcastFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x32.DotProdPairs", opLen2(ssa.OpDotProdPairsInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.DotProdPairsMasked", opLen3(ssa.OpDotProdPairsMaskedInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.DotProdPairsMasked", opLen3(ssa.OpDotProdPairsMaskedInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x32.DotProdPairsMasked", opLen3(ssa.OpDotProdPairsMaskedInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.DotProdPairsSaturated", opLen2(ssa.OpDotProdPairsSaturatedUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.DotProdPairsSaturated", opLen2(ssa.OpDotProdPairsSaturatedUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x64.DotProdPairsSaturated", opLen2(ssa.OpDotProdPairsSaturatedUint8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.DotProdPairsSaturatedMasked", opLen3(ssa.OpDotProdPairsSaturatedMaskedUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.DotProdPairsSaturatedMasked", opLen3(ssa.OpDotProdPairsSaturatedMaskedUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x64.DotProdPairsSaturatedMasked", opLen3(ssa.OpDotProdPairsSaturatedMaskedUint8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.Equal", opLen2(ssa.OpEqualInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x16.Equal", opLen2(ssa.OpEqualInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.Equal", opLen2(ssa.OpEqualInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x32.Equal", opLen2(ssa.OpEqualInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.Equal", opLen2(ssa.OpEqualInt8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x64.Equal", opLen2(ssa.OpEqualInt8x64, types.TypeVec512), sys.AMD64)
@ -454,42 +457,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Float64x2.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x2.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
addF(simdPackage, "Float64x4.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x4.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
addF(simdPackage, "Float64x8.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x8.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
addF(simdPackage, "Float32x4.FusedMultiplyAdd", opLen3(ssa.OpFusedMultiplyAddFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.FusedMultiplyAdd", opLen3(ssa.OpFusedMultiplyAddFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.FusedMultiplyAdd", opLen3(ssa.OpFusedMultiplyAddFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.FusedMultiplyAdd", opLen3(ssa.OpFusedMultiplyAddFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.FusedMultiplyAdd", opLen3(ssa.OpFusedMultiplyAddFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.FusedMultiplyAdd", opLen3(ssa.OpFusedMultiplyAddFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.FusedMultiplyAddMasked", opLen4(ssa.OpFusedMultiplyAddMaskedFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.FusedMultiplyAddMasked", opLen4(ssa.OpFusedMultiplyAddMaskedFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.FusedMultiplyAddMasked", opLen4(ssa.OpFusedMultiplyAddMaskedFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.FusedMultiplyAddMasked", opLen4(ssa.OpFusedMultiplyAddMaskedFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.FusedMultiplyAddMasked", opLen4(ssa.OpFusedMultiplyAddMaskedFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.FusedMultiplyAddMasked", opLen4(ssa.OpFusedMultiplyAddMaskedFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.FusedMultiplyAddSub", opLen3(ssa.OpFusedMultiplyAddSubFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.FusedMultiplyAddSub", opLen3(ssa.OpFusedMultiplyAddSubFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.FusedMultiplyAddSub", opLen3(ssa.OpFusedMultiplyAddSubFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.FusedMultiplyAddSub", opLen3(ssa.OpFusedMultiplyAddSubFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.FusedMultiplyAddSub", opLen3(ssa.OpFusedMultiplyAddSubFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.FusedMultiplyAddSub", opLen3(ssa.OpFusedMultiplyAddSubFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.FusedMultiplyAddSubMasked", opLen4(ssa.OpFusedMultiplyAddSubMaskedFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.FusedMultiplyAddSubMasked", opLen4(ssa.OpFusedMultiplyAddSubMaskedFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.FusedMultiplyAddSubMasked", opLen4(ssa.OpFusedMultiplyAddSubMaskedFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.FusedMultiplyAddSubMasked", opLen4(ssa.OpFusedMultiplyAddSubMaskedFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.FusedMultiplyAddSubMasked", opLen4(ssa.OpFusedMultiplyAddSubMaskedFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.FusedMultiplyAddSubMasked", opLen4(ssa.OpFusedMultiplyAddSubMaskedFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.FusedMultiplySubAddMasked", opLen4(ssa.OpFusedMultiplySubAddMaskedFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.FusedMultiplySubAddMasked", opLen4(ssa.OpFusedMultiplySubAddMaskedFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.FusedMultiplySubAddMasked", opLen4(ssa.OpFusedMultiplySubAddMaskedFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.FusedMultiplySubAddMasked", opLen4(ssa.OpFusedMultiplySubAddMaskedFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.FusedMultiplySubAddMasked", opLen4(ssa.OpFusedMultiplySubAddMaskedFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.FusedMultiplySubAddMasked", opLen4(ssa.OpFusedMultiplySubAddMaskedFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.GaloisFieldAffineTransform", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformUint8x16, types.TypeVec128, 0), sys.AMD64) addF(simdPackage, "Uint8x16.GaloisFieldAffineTransform", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformUint8x16, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Uint8x32.GaloisFieldAffineTransform", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformUint8x32, types.TypeVec256, 0), sys.AMD64) addF(simdPackage, "Uint8x32.GaloisFieldAffineTransform", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformUint8x32, types.TypeVec256, 0), sys.AMD64)
addF(simdPackage, "Uint8x64.GaloisFieldAffineTransform", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformUint8x64, types.TypeVec512, 0), sys.AMD64) addF(simdPackage, "Uint8x64.GaloisFieldAffineTransform", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformUint8x64, types.TypeVec512, 0), sys.AMD64)
@ -943,34 +910,49 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Int64x2.Mul", opLen2(ssa.OpMulInt64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int64x2.Mul", opLen2(ssa.OpMulInt64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x4.Mul", opLen2(ssa.OpMulInt64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int64x4.Mul", opLen2(ssa.OpMulInt64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int64x8.Mul", opLen2(ssa.OpMulInt64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int64x8.Mul", opLen2(ssa.OpMulInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x8.Mul", opLen2(ssa.OpMulUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.Mul", opLen2(ssa.OpMulUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.Mul", opLen2(ssa.OpMulUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x4.Mul", opLen2(ssa.OpMulUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x8.Mul", opLen2(ssa.OpMulUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x16.Mul", opLen2(ssa.OpMulUint32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x2.Mul", opLen2(ssa.OpMulUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.Mul", opLen2(ssa.OpMulUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.Mul", opLen2(ssa.OpMulUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.MulAdd", opLen3(ssa.OpMulAddFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.MulAdd", opLen3(ssa.OpMulAddFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.MulAdd", opLen3(ssa.OpMulAddFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.MulAdd", opLen3(ssa.OpMulAddFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.MulAdd", opLen3(ssa.OpMulAddFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.MulAdd", opLen3(ssa.OpMulAddFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.MulAddSub", opLen3(ssa.OpMulAddSubFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.MulAddSub", opLen3(ssa.OpMulAddSubFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.MulAddSub", opLen3(ssa.OpMulAddSubFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.MulAddSub", opLen3(ssa.OpMulAddSubFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.MulAddSub", opLen3(ssa.OpMulAddSubFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.MulAddSub", opLen3(ssa.OpMulAddSubFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int32x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int32x8.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int64x2.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int64x8.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint32x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x8.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint32x8.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x2.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int64x2.MulEvenWidenMasked", opLen3(ssa.OpMulEvenWidenMaskedInt64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x4.MulEvenWidenMasked", opLen3(ssa.OpMulEvenWidenMaskedInt64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int64x8.MulEvenWidenMasked", opLen3(ssa.OpMulEvenWidenMaskedInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x2.MulEvenWidenMasked", opLen3(ssa.OpMulEvenWidenMaskedUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.MulEvenWidenMasked", opLen3(ssa.OpMulEvenWidenMaskedUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.MulEvenWidenMasked", opLen3(ssa.OpMulEvenWidenMaskedUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.MulHigh", opLen2(ssa.OpMulHighInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.MulHigh", opLen2(ssa.OpMulHighInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.MulHigh", opLen2(ssa.OpMulHighInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x16.MulHigh", opLen2(ssa.OpMulHighInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x32.MulHigh", opLen2(ssa.OpMulHighInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.MulHigh", opLen2(ssa.OpMulHighInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x8.MulHigh", opLen2(ssa.OpMulHighUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.MulHigh", opLen2(ssa.OpMulHighUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.MulHigh", opLen2(ssa.OpMulHighUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.MulHighMasked", opLen3(ssa.OpMulHighMaskedInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.MulHighMasked", opLen3(ssa.OpMulHighMaskedInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.MulHighMasked", opLen3(ssa.OpMulHighMaskedInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x16.MulHighMasked", opLen3(ssa.OpMulHighMaskedInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x32.MulHighMasked", opLen3(ssa.OpMulHighMaskedInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.MulHighMasked", opLen3(ssa.OpMulHighMaskedInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x8.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.MulMasked", opLen3(ssa.OpMulMaskedFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.MulMasked", opLen3(ssa.OpMulMaskedFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.MulMasked", opLen3(ssa.OpMulMaskedFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x8.MulMasked", opLen3(ssa.OpMulMaskedFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.MulMasked", opLen3(ssa.OpMulMaskedFloat32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x16.MulMasked", opLen3(ssa.OpMulMaskedFloat32x16, types.TypeVec512), sys.AMD64)
@ -986,6 +968,27 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Int64x2.MulMasked", opLen3(ssa.OpMulMaskedInt64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int64x2.MulMasked", opLen3(ssa.OpMulMaskedInt64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x4.MulMasked", opLen3(ssa.OpMulMaskedInt64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int64x4.MulMasked", opLen3(ssa.OpMulMaskedInt64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int64x8.MulMasked", opLen3(ssa.OpMulMaskedInt64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int64x8.MulMasked", opLen3(ssa.OpMulMaskedInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x8.MulMasked", opLen3(ssa.OpMulMaskedUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.MulMasked", opLen3(ssa.OpMulMaskedUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.MulMasked", opLen3(ssa.OpMulMaskedUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x4.MulMasked", opLen3(ssa.OpMulMaskedUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x8.MulMasked", opLen3(ssa.OpMulMaskedUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x16.MulMasked", opLen3(ssa.OpMulMaskedUint32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x2.MulMasked", opLen3(ssa.OpMulMaskedUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.MulMasked", opLen3(ssa.OpMulMaskedUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.MulMasked", opLen3(ssa.OpMulMaskedUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.MulSubAdd", opLen3(ssa.OpMulSubAddFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.MulSubAdd", opLen3(ssa.OpMulSubAddFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.MulSubAdd", opLen3(ssa.OpMulSubAddFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.MulSubAdd", opLen3(ssa.OpMulSubAddFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.MulSubAdd", opLen3(ssa.OpMulSubAddFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.MulSubAdd", opLen3(ssa.OpMulSubAddFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.NotEqual", opLen2(ssa.OpNotEqualFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.NotEqual", opLen2(ssa.OpNotEqualFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.NotEqual", opLen2(ssa.OpNotEqualFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x8.NotEqual", opLen2(ssa.OpNotEqualFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.NotEqual", opLen2(ssa.OpNotEqualFloat32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x16.NotEqual", opLen2(ssa.OpNotEqualFloat32x16, types.TypeVec512), sys.AMD64)
@ -1046,6 +1049,54 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint64x2.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x2.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint64x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.OnesCount", opLen1(ssa.OpOnesCountInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.OnesCount", opLen1(ssa.OpOnesCountInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.OnesCount", opLen1(ssa.OpOnesCountInt8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.OnesCount", opLen1(ssa.OpOnesCountInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.OnesCount", opLen1(ssa.OpOnesCountInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x32.OnesCount", opLen1(ssa.OpOnesCountInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.OnesCount", opLen1(ssa.OpOnesCountInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.OnesCount", opLen1(ssa.OpOnesCountInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.OnesCount", opLen1(ssa.OpOnesCountInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int64x2.OnesCount", opLen1(ssa.OpOnesCountInt64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x4.OnesCount", opLen1(ssa.OpOnesCountInt64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int64x8.OnesCount", opLen1(ssa.OpOnesCountInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.OnesCount", opLen1(ssa.OpOnesCountUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.OnesCount", opLen1(ssa.OpOnesCountUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x64.OnesCount", opLen1(ssa.OpOnesCountUint8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x8.OnesCount", opLen1(ssa.OpOnesCountUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.OnesCount", opLen1(ssa.OpOnesCountUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.OnesCount", opLen1(ssa.OpOnesCountUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x4.OnesCount", opLen1(ssa.OpOnesCountUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x8.OnesCount", opLen1(ssa.OpOnesCountUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x16.OnesCount", opLen1(ssa.OpOnesCountUint32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x2.OnesCount", opLen1(ssa.OpOnesCountUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.OnesCount", opLen1(ssa.OpOnesCountUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.OnesCount", opLen1(ssa.OpOnesCountUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x32.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int64x2.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x4.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int64x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x64.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x4.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x2.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.Or", opLen2(ssa.OpOrInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x16.Or", opLen2(ssa.OpOrInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.Or", opLen2(ssa.OpOrInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x32.Or", opLen2(ssa.OpOrInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.Or", opLen2(ssa.OpOrInt8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x64.Or", opLen2(ssa.OpOrInt8x64, types.TypeVec512), sys.AMD64)
@ -1082,12 +1133,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint64x2.OrMasked", opLen3(ssa.OpOrMaskedUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x2.OrMasked", opLen3(ssa.OpOrMaskedUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.OrMasked", opLen3(ssa.OpOrMaskedUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x4.OrMasked", opLen3(ssa.OpOrMaskedUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.OrMasked", opLen3(ssa.OpOrMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint64x8.OrMasked", opLen3(ssa.OpOrMaskedUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.PairDotProd", opLen2(ssa.OpPairDotProdInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.PairDotProd", opLen2(ssa.OpPairDotProdInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x32.PairDotProd", opLen2(ssa.OpPairDotProdInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x32.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.Permute", opLen2_21(ssa.OpPermuteInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x16.Permute", opLen2_21(ssa.OpPermuteInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x16.Permute", opLen2_21(ssa.OpPermuteUint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint8x16.Permute", opLen2_21(ssa.OpPermuteUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.Permute", opLen2_21(ssa.OpPermuteInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x32.Permute", opLen2_21(ssa.OpPermuteInt8x32, types.TypeVec256), sys.AMD64)
@ -1196,54 +1241,30 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Float64x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedFloat64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float64x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int64x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int64x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint64x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.PopCount", opLen1(ssa.OpPopCountInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.Reciprocal", opLen1(ssa.OpReciprocalFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.PopCount", opLen1(ssa.OpPopCountInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x8.Reciprocal", opLen1(ssa.OpReciprocalFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.PopCount", opLen1(ssa.OpPopCountInt8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x16.Reciprocal", opLen1(ssa.OpReciprocalFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.PopCount", opLen1(ssa.OpPopCountInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x2.Reciprocal", opLen1(ssa.OpReciprocalFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.PopCount", opLen1(ssa.OpPopCountInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x4.Reciprocal", opLen1(ssa.OpReciprocalFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x32.PopCount", opLen1(ssa.OpPopCountInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float64x8.Reciprocal", opLen1(ssa.OpReciprocalFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.PopCount", opLen1(ssa.OpPopCountInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.PopCount", opLen1(ssa.OpPopCountInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x8.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.PopCount", opLen1(ssa.OpPopCountInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x16.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int64x2.PopCount", opLen1(ssa.OpPopCountInt64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x2.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x4.PopCount", opLen1(ssa.OpPopCountInt64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x4.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int64x8.PopCount", opLen1(ssa.OpPopCountInt64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float64x8.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.PopCount", opLen1(ssa.OpPopCountUint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.PopCount", opLen1(ssa.OpPopCountUint8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x8.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x64.PopCount", opLen1(ssa.OpPopCountUint8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x16.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x8.PopCount", opLen1(ssa.OpPopCountUint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x2.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.PopCount", opLen1(ssa.OpPopCountUint16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x4.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.PopCount", opLen1(ssa.OpPopCountUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float64x8.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x4.PopCount", opLen1(ssa.OpPopCountUint32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x8.PopCount", opLen1(ssa.OpPopCountUint32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x8.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x16.PopCount", opLen1(ssa.OpPopCountUint32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x16.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x2.PopCount", opLen1(ssa.OpPopCountUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x2.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.PopCount", opLen1(ssa.OpPopCountUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x4.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.PopCount", opLen1(ssa.OpPopCountUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float64x8.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x32.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int64x2.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x4.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int64x8.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x64.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x8.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x4.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x8.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x16.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x2.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x4, types.TypeVec128, 0), sys.AMD64) addF(simdPackage, "Int32x4.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x4, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Int32x8.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x8, types.TypeVec256, 0), sys.AMD64) addF(simdPackage, "Int32x8.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x8, types.TypeVec256, 0), sys.AMD64)
addF(simdPackage, "Int32x16.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x16, types.TypeVec512, 0), sys.AMD64) addF(simdPackage, "Int32x16.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x16, types.TypeVec512, 0), sys.AMD64)
@ -1340,52 +1361,34 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint64x2.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x2.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x4.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint64x8.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.Round", opLen1(ssa.OpRoundFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.RoundToEven", opLen1(ssa.OpRoundToEvenFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.Round", opLen1(ssa.OpRoundFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x8.RoundToEven", opLen1(ssa.OpRoundToEvenFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x2.Round", opLen1(ssa.OpRoundFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x2.RoundToEven", opLen1(ssa.OpRoundToEvenFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.Round", opLen1(ssa.OpRoundFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x4.RoundToEven", opLen1(ssa.OpRoundToEvenFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x4.RoundScaled", opLen1Imm8(ssa.OpRoundScaledFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x4.RoundToEvenScaled", opLen1Imm8(ssa.OpRoundToEvenScaledFloat32x4, types.TypeVec128, 4), sys.AMD64)
addF(simdPackage, "Float32x8.RoundScaled", opLen1Imm8(ssa.OpRoundScaledFloat32x8, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float32x8.RoundToEvenScaled", opLen1Imm8(ssa.OpRoundToEvenScaledFloat32x8, types.TypeVec256, 4), sys.AMD64)
addF(simdPackage, "Float32x16.RoundScaled", opLen1Imm8(ssa.OpRoundScaledFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x16.RoundToEvenScaled", opLen1Imm8(ssa.OpRoundToEvenScaledFloat32x16, types.TypeVec512, 4), sys.AMD64)
addF(simdPackage, "Float64x2.RoundScaled", opLen1Imm8(ssa.OpRoundScaledFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x2.RoundToEvenScaled", opLen1Imm8(ssa.OpRoundToEvenScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
addF(simdPackage, "Float64x4.RoundScaled", opLen1Imm8(ssa.OpRoundScaledFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x4.RoundToEvenScaled", opLen1Imm8(ssa.OpRoundToEvenScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
addF(simdPackage, "Float64x8.RoundScaled", opLen1Imm8(ssa.OpRoundScaledFloat64x8, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x8.RoundToEvenScaled", opLen1Imm8(ssa.OpRoundToEvenScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
addF(simdPackage, "Float32x4.RoundScaledMasked", opLen2Imm8(ssa.OpRoundScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x4.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
addF(simdPackage, "Float32x8.RoundScaledMasked", opLen2Imm8(ssa.OpRoundScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float32x8.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
addF(simdPackage, "Float32x16.RoundScaledMasked", opLen2Imm8(ssa.OpRoundScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x16.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
addF(simdPackage, "Float64x2.RoundScaledMasked", opLen2Imm8(ssa.OpRoundScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x2.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
addF(simdPackage, "Float64x4.RoundScaledMasked", opLen2Imm8(ssa.OpRoundScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x4.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
addF(simdPackage, "Float64x8.RoundScaledMasked", opLen2Imm8(ssa.OpRoundScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x8.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
addF(simdPackage, "Float32x4.RoundScaledResidue", opLen1Imm8(ssa.OpRoundScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x4.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
addF(simdPackage, "Float32x8.RoundScaledResidue", opLen1Imm8(ssa.OpRoundScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float32x8.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
addF(simdPackage, "Float32x16.RoundScaledResidue", opLen1Imm8(ssa.OpRoundScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x16.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
addF(simdPackage, "Float64x2.RoundScaledResidue", opLen1Imm8(ssa.OpRoundScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x2.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
addF(simdPackage, "Float64x4.RoundScaledResidue", opLen1Imm8(ssa.OpRoundScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x4.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
addF(simdPackage, "Float64x8.RoundScaledResidue", opLen1Imm8(ssa.OpRoundScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x8.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
addF(simdPackage, "Float32x4.RoundScaledResidueMasked", opLen2Imm8(ssa.OpRoundScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x4.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
addF(simdPackage, "Float32x8.RoundScaledResidueMasked", opLen2Imm8(ssa.OpRoundScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float32x8.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
addF(simdPackage, "Float32x16.RoundScaledResidueMasked", opLen2Imm8(ssa.OpRoundScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x16.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
addF(simdPackage, "Float64x2.RoundScaledResidueMasked", opLen2Imm8(ssa.OpRoundScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x2.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
addF(simdPackage, "Float64x4.RoundScaledResidueMasked", opLen2Imm8(ssa.OpRoundScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x4.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
addF(simdPackage, "Float64x8.RoundScaledResidueMasked", opLen2Imm8(ssa.OpRoundScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x8.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
addF(simdPackage, "Int32x4.SaturatedAddDotProd", opLen3(ssa.OpSaturatedAddDotProdInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.SaturatedAddDotProd", opLen3(ssa.OpSaturatedAddDotProdInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.SaturatedAddDotProd", opLen3(ssa.OpSaturatedAddDotProdInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.SaturatedAddDotProdMasked", opLen4(ssa.OpSaturatedAddDotProdMaskedInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.SaturatedAddDotProdMasked", opLen4(ssa.OpSaturatedAddDotProdMaskedInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.SaturatedAddDotProdMasked", opLen4(ssa.OpSaturatedAddDotProdMaskedInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x64.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.SaturatedUnsignedSignedPairDotProdMasked", opLen3(ssa.OpSaturatedUnsignedSignedPairDotProdMaskedUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.SaturatedUnsignedSignedPairDotProdMasked", opLen3(ssa.OpSaturatedUnsignedSignedPairDotProdMaskedUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x64.SaturatedUnsignedSignedPairDotProdMasked", opLen3(ssa.OpSaturatedUnsignedSignedPairDotProdMaskedUint8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.Scale", opLen2(ssa.OpScaleFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.Scale", opLen2(ssa.OpScaleFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.Scale", opLen2(ssa.OpScaleFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x8.Scale", opLen2(ssa.OpScaleFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.Scale", opLen2(ssa.OpScaleFloat32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x16.Scale", opLen2(ssa.OpScaleFloat32x16, types.TypeVec512), sys.AMD64)
@ -1734,12 +1737,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint64x2.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x2.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x4.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint64x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.Sign", opLen2(ssa.OpSignInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.Sign", opLen2(ssa.OpSignInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x8.Sign", opLen2(ssa.OpSignInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.Sign", opLen2(ssa.OpSignInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x4.Sign", opLen2(ssa.OpSignInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.Sign", opLen2(ssa.OpSignInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x4.Sqrt", opLen1(ssa.OpSqrtFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.Sqrt", opLen1(ssa.OpSqrtFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.Sqrt", opLen1(ssa.OpSqrtFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x8.Sqrt", opLen1(ssa.OpSqrtFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.Sqrt", opLen1(ssa.OpSqrtFloat32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x16.Sqrt", opLen1(ssa.OpSqrtFloat32x16, types.TypeVec512), sys.AMD64)
@ -1878,12 +1875,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Float64x2.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x2.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
addF(simdPackage, "Float64x4.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x4.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
addF(simdPackage, "Float64x8.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x8.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
addF(simdPackage, "Int8x16.UnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.UnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.UnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.UnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.UnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.UnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.Xor", opLen2(ssa.OpXorInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x16.Xor", opLen2(ssa.OpXorInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.Xor", opLen2(ssa.OpXorInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x32.Xor", opLen2(ssa.OpXorInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.Xor", opLen2(ssa.OpXorInt8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x64.Xor", opLen2(ssa.OpXorInt8x64, types.TypeVec512), sys.AMD64)

File diff suppressed because it is too large Load diff

View file

@ -203,25 +203,6 @@ func TestExpand(t *testing.T) {
} }
} }
func TestPairDotProdAccumulate(t *testing.T) {
if !simd.HasAVX512GFNI() {
// TODO: this function is actually VNNI, let's implement and call the right check.
t.Skip("Test requires HasAVX512GFNI, not available on this hardware")
return
}
x := simd.LoadInt16x8Slice([]int16{2, 2, 2, 2, 2, 2, 2, 2})
z := simd.LoadInt32x4Slice([]int32{3, 3, 3, 3})
want := []int32{11, 11, 11, 11}
got := make([]int32, 4)
z = z.AddDotProd(x, x)
z.StoreSlice(got)
for i := range 4 {
if got[i] != want[i] {
t.Errorf("a and b differ at index %d, got=%d, want=%d", i, got[i], want[i])
}
}
}
var testShiftAllVal uint64 = 3 var testShiftAllVal uint64 = 3
func TestShiftAll(t *testing.T) { func TestShiftAll(t *testing.T) {

View file

@ -13,11 +13,11 @@ import (
func TestFMA(t *testing.T) { func TestFMA(t *testing.T) {
if simd.HasAVX512() { if simd.HasAVX512() {
testFloat32x4TernaryFlaky(t, simd.Float32x4.FusedMultiplyAdd, fmaSlice[float32], 0.001) testFloat32x4TernaryFlaky(t, simd.Float32x4.MulAdd, fmaSlice[float32], 0.001)
testFloat32x8TernaryFlaky(t, simd.Float32x8.FusedMultiplyAdd, fmaSlice[float32], 0.001) testFloat32x8TernaryFlaky(t, simd.Float32x8.MulAdd, fmaSlice[float32], 0.001)
testFloat32x16TernaryFlaky(t, simd.Float32x16.FusedMultiplyAdd, fmaSlice[float32], 0.001) testFloat32x16TernaryFlaky(t, simd.Float32x16.MulAdd, fmaSlice[float32], 0.001)
testFloat64x2Ternary(t, simd.Float64x2.FusedMultiplyAdd, fmaSlice[float64]) testFloat64x2Ternary(t, simd.Float64x2.MulAdd, fmaSlice[float64])
testFloat64x4Ternary(t, simd.Float64x4.FusedMultiplyAdd, fmaSlice[float64]) testFloat64x4Ternary(t, simd.Float64x4.MulAdd, fmaSlice[float64])
testFloat64x8Ternary(t, simd.Float64x8.FusedMultiplyAdd, fmaSlice[float64]) testFloat64x8Ternary(t, simd.Float64x8.MulAdd, fmaSlice[float64])
} }
} }

View file

@ -46,10 +46,10 @@ func TestTrunc(t *testing.T) {
} }
func TestRound(t *testing.T) { func TestRound(t *testing.T) {
testFloat32x4Unary(t, simd.Float32x4.Round, roundSlice[float32]) testFloat32x4Unary(t, simd.Float32x4.RoundToEven, roundSlice[float32])
testFloat32x8Unary(t, simd.Float32x8.Round, roundSlice[float32]) testFloat32x8Unary(t, simd.Float32x8.RoundToEven, roundSlice[float32])
testFloat64x2Unary(t, simd.Float64x2.Round, roundSlice[float64]) testFloat64x2Unary(t, simd.Float64x2.RoundToEven, roundSlice[float64])
testFloat64x4Unary(t, simd.Float64x4.Round, roundSlice[float64]) testFloat64x4Unary(t, simd.Float64x4.RoundToEven, roundSlice[float64])
if simd.HasAVX512() { if simd.HasAVX512() {
// testFloat32x16Unary(t, simd.Float32x16.Round, roundSlice[float32]) // missing // testFloat32x16Unary(t, simd.Float32x16.Round, roundSlice[float32]) // missing
// testFloat64x8Unary(t, simd.Float64x8.Round, roundSlice[float64]) // missing // testFloat64x8Unary(t, simd.Float64x8.Round, roundSlice[float64]) // missing
@ -68,19 +68,19 @@ func TestSqrt(t *testing.T) {
} }
func TestAbsolute(t *testing.T) { func TestAbsolute(t *testing.T) {
testInt8x16Unary(t, simd.Int8x16.Absolute, map1[int8](abs)) testInt8x16Unary(t, simd.Int8x16.Abs, map1[int8](abs))
testInt8x32Unary(t, simd.Int8x32.Absolute, map1[int8](abs)) testInt8x32Unary(t, simd.Int8x32.Abs, map1[int8](abs))
testInt16x8Unary(t, simd.Int16x8.Absolute, map1[int16](abs)) testInt16x8Unary(t, simd.Int16x8.Abs, map1[int16](abs))
testInt16x16Unary(t, simd.Int16x16.Absolute, map1[int16](abs)) testInt16x16Unary(t, simd.Int16x16.Abs, map1[int16](abs))
testInt32x4Unary(t, simd.Int32x4.Absolute, map1[int32](abs)) testInt32x4Unary(t, simd.Int32x4.Abs, map1[int32](abs))
testInt32x8Unary(t, simd.Int32x8.Absolute, map1[int32](abs)) testInt32x8Unary(t, simd.Int32x8.Abs, map1[int32](abs))
if simd.HasAVX512() { if simd.HasAVX512() {
testInt8x64Unary(t, simd.Int8x64.Absolute, map1[int8](abs)) testInt8x64Unary(t, simd.Int8x64.Abs, map1[int8](abs))
testInt16x32Unary(t, simd.Int16x32.Absolute, map1[int16](abs)) testInt16x32Unary(t, simd.Int16x32.Abs, map1[int16](abs))
testInt32x16Unary(t, simd.Int32x16.Absolute, map1[int32](abs)) testInt32x16Unary(t, simd.Int32x16.Abs, map1[int32](abs))
testInt64x2Unary(t, simd.Int64x2.Absolute, map1[int64](abs)) testInt64x2Unary(t, simd.Int64x2.Abs, map1[int64](abs))
testInt64x4Unary(t, simd.Int64x4.Absolute, map1[int64](abs)) testInt64x4Unary(t, simd.Int64x4.Abs, map1[int64](abs))
testInt64x8Unary(t, simd.Int64x8.Absolute, map1[int64](abs)) testInt64x8Unary(t, simd.Int64x8.Abs, map1[int64](abs))
} }
} }