diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go index 5fc068c895c..484c389cef2 100644 --- a/src/cmd/compile/internal/amd64/simdssa.go +++ b/src/cmd/compile/internal/amd64/simdssa.go @@ -12,21 +12,21 @@ import ( func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { var p *obj.Prog switch v.Op { - case ssa.OpAMD64VPABSW256, + case ssa.OpAMD64VPABSB128, + ssa.OpAMD64VPABSB256, + ssa.OpAMD64VPABSB512, ssa.OpAMD64VPABSW128, + ssa.OpAMD64VPABSW256, + ssa.OpAMD64VPABSW512, ssa.OpAMD64VPABSD128, ssa.OpAMD64VPABSD256, - ssa.OpAMD64VPABSB128, - ssa.OpAMD64VPABSB256, - ssa.OpAMD64VPABSW512, ssa.OpAMD64VPABSD512, ssa.OpAMD64VPABSQ128, ssa.OpAMD64VPABSQ256, ssa.OpAMD64VPABSQ512, - ssa.OpAMD64VPABSB512, - ssa.OpAMD64VRCP14PS512, ssa.OpAMD64VRCP14PS128, ssa.OpAMD64VRCP14PS256, + ssa.OpAMD64VRCP14PS512, ssa.OpAMD64VRCP14PD128, ssa.OpAMD64VRCP14PD256, ssa.OpAMD64VRCP14PD512, @@ -36,400 +36,395 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VRSQRT14PD128, ssa.OpAMD64VRSQRT14PD256, ssa.OpAMD64VRSQRT14PD512, - ssa.OpAMD64VPOPCNTW256, - ssa.OpAMD64VPOPCNTW512, - ssa.OpAMD64VPOPCNTW128, - ssa.OpAMD64VPOPCNTD512, - ssa.OpAMD64VPOPCNTD128, - ssa.OpAMD64VPOPCNTD256, - ssa.OpAMD64VPOPCNTQ128, - ssa.OpAMD64VPOPCNTQ256, - ssa.OpAMD64VPOPCNTQ512, ssa.OpAMD64VPOPCNTB128, ssa.OpAMD64VPOPCNTB256, ssa.OpAMD64VPOPCNTB512, + ssa.OpAMD64VPOPCNTW128, + ssa.OpAMD64VPOPCNTW256, + ssa.OpAMD64VPOPCNTW512, + ssa.OpAMD64VPOPCNTD128, + ssa.OpAMD64VPOPCNTD256, + ssa.OpAMD64VPOPCNTD512, + ssa.OpAMD64VPOPCNTQ128, + ssa.OpAMD64VPOPCNTQ256, + ssa.OpAMD64VPOPCNTQ512, ssa.OpAMD64VSQRTPS128, ssa.OpAMD64VSQRTPS256, + ssa.OpAMD64VSQRTPS512, ssa.OpAMD64VSQRTPD128, ssa.OpAMD64VSQRTPD256, - ssa.OpAMD64VSQRTPS512, ssa.OpAMD64VSQRTPD512: p = simdFp11(s, v) case ssa.OpAMD64VADDPS128, ssa.OpAMD64VADDPS256, + ssa.OpAMD64VADDPS512, ssa.OpAMD64VADDPD128, ssa.OpAMD64VADDPD256, - ssa.OpAMD64VPADDW256, - ssa.OpAMD64VPADDW128, - ssa.OpAMD64VPADDD128, - ssa.OpAMD64VPADDD256, - ssa.OpAMD64VPADDQ128, - ssa.OpAMD64VPADDQ256, + ssa.OpAMD64VADDPD512, ssa.OpAMD64VPADDB128, ssa.OpAMD64VPADDB256, - ssa.OpAMD64VADDPS512, - ssa.OpAMD64VADDPD512, - ssa.OpAMD64VPADDW512, - ssa.OpAMD64VPADDD512, - ssa.OpAMD64VPADDQ512, ssa.OpAMD64VPADDB512, + ssa.OpAMD64VPADDW128, + ssa.OpAMD64VPADDW256, + ssa.OpAMD64VPADDW512, + ssa.OpAMD64VPADDD128, + ssa.OpAMD64VPADDD256, + ssa.OpAMD64VPADDD512, + ssa.OpAMD64VPADDQ128, + ssa.OpAMD64VPADDQ256, + ssa.OpAMD64VPADDQ512, ssa.OpAMD64VADDSUBPS128, ssa.OpAMD64VADDSUBPS256, ssa.OpAMD64VADDSUBPD128, ssa.OpAMD64VADDSUBPD256, ssa.OpAMD64VANDPS128, ssa.OpAMD64VANDPS256, + ssa.OpAMD64VANDPS512, ssa.OpAMD64VANDPD128, ssa.OpAMD64VANDPD256, - ssa.OpAMD64VPAND256, - ssa.OpAMD64VPAND128, - ssa.OpAMD64VANDPS512, ssa.OpAMD64VANDPD512, + ssa.OpAMD64VPAND128, + ssa.OpAMD64VPAND256, ssa.OpAMD64VPANDD512, ssa.OpAMD64VPANDQ512, ssa.OpAMD64VANDNPS128, ssa.OpAMD64VANDNPS256, + ssa.OpAMD64VANDNPS512, ssa.OpAMD64VANDNPD128, ssa.OpAMD64VANDNPD256, - ssa.OpAMD64VPANDN256, - ssa.OpAMD64VPANDN128, - ssa.OpAMD64VANDNPS512, ssa.OpAMD64VANDNPD512, + ssa.OpAMD64VPANDN128, + ssa.OpAMD64VPANDN256, ssa.OpAMD64VPANDND512, ssa.OpAMD64VPANDNQ512, - ssa.OpAMD64VPAVGW256, - ssa.OpAMD64VPAVGW128, ssa.OpAMD64VPAVGB128, ssa.OpAMD64VPAVGB256, - ssa.OpAMD64VPAVGW512, ssa.OpAMD64VPAVGB512, + ssa.OpAMD64VPAVGW128, + ssa.OpAMD64VPAVGW256, + ssa.OpAMD64VPAVGW512, ssa.OpAMD64VDIVPS128, ssa.OpAMD64VDIVPS256, + ssa.OpAMD64VDIVPS512, ssa.OpAMD64VDIVPD128, ssa.OpAMD64VDIVPD256, - ssa.OpAMD64VDIVPS512, ssa.OpAMD64VDIVPD512, - ssa.OpAMD64VPCMPEQW256, + ssa.OpAMD64VPCMPEQB128, + ssa.OpAMD64VPCMPEQB256, ssa.OpAMD64VPCMPEQW128, + ssa.OpAMD64VPCMPEQW256, ssa.OpAMD64VPCMPEQD128, ssa.OpAMD64VPCMPEQD256, ssa.OpAMD64VPCMPEQQ128, ssa.OpAMD64VPCMPEQQ256, - ssa.OpAMD64VPCMPEQB128, - ssa.OpAMD64VPCMPEQB256, - ssa.OpAMD64VPCMPGTW256, + ssa.OpAMD64VPCMPGTB128, + ssa.OpAMD64VPCMPGTB256, ssa.OpAMD64VPCMPGTW128, + ssa.OpAMD64VPCMPGTW256, ssa.OpAMD64VPCMPGTD128, ssa.OpAMD64VPCMPGTD256, ssa.OpAMD64VPCMPGTQ256, - ssa.OpAMD64VPCMPGTB128, - ssa.OpAMD64VPCMPGTB256, ssa.OpAMD64VMAXPS128, ssa.OpAMD64VMAXPS256, + ssa.OpAMD64VMAXPS512, ssa.OpAMD64VMAXPD128, ssa.OpAMD64VMAXPD256, - ssa.OpAMD64VPMAXSW256, - ssa.OpAMD64VPMAXSW128, - ssa.OpAMD64VPMAXSD128, - ssa.OpAMD64VPMAXSD256, + ssa.OpAMD64VMAXPD512, ssa.OpAMD64VPMAXSB128, ssa.OpAMD64VPMAXSB256, - ssa.OpAMD64VPMAXUW256, - ssa.OpAMD64VPMAXUW128, - ssa.OpAMD64VPMAXUD128, - ssa.OpAMD64VPMAXUD256, - ssa.OpAMD64VPMAXUB128, - ssa.OpAMD64VPMAXUB256, - ssa.OpAMD64VMAXPS512, - ssa.OpAMD64VMAXPD512, + ssa.OpAMD64VPMAXSB512, + ssa.OpAMD64VPMAXSW128, + ssa.OpAMD64VPMAXSW256, ssa.OpAMD64VPMAXSW512, + ssa.OpAMD64VPMAXSD128, + ssa.OpAMD64VPMAXSD256, ssa.OpAMD64VPMAXSD512, ssa.OpAMD64VPMAXSQ128, ssa.OpAMD64VPMAXSQ256, ssa.OpAMD64VPMAXSQ512, - ssa.OpAMD64VPMAXSB512, + ssa.OpAMD64VPMAXUB128, + ssa.OpAMD64VPMAXUB256, + ssa.OpAMD64VPMAXUB512, + ssa.OpAMD64VPMAXUW128, + ssa.OpAMD64VPMAXUW256, ssa.OpAMD64VPMAXUW512, + ssa.OpAMD64VPMAXUD128, + ssa.OpAMD64VPMAXUD256, ssa.OpAMD64VPMAXUD512, ssa.OpAMD64VPMAXUQ128, ssa.OpAMD64VPMAXUQ256, ssa.OpAMD64VPMAXUQ512, - ssa.OpAMD64VPMAXUB512, ssa.OpAMD64VMINPS128, ssa.OpAMD64VMINPS256, + ssa.OpAMD64VMINPS512, ssa.OpAMD64VMINPD128, ssa.OpAMD64VMINPD256, - ssa.OpAMD64VPMINSW256, - ssa.OpAMD64VPMINSW128, - ssa.OpAMD64VPMINSD128, - ssa.OpAMD64VPMINSD256, + ssa.OpAMD64VMINPD512, ssa.OpAMD64VPMINSB128, ssa.OpAMD64VPMINSB256, - ssa.OpAMD64VPMINUW256, - ssa.OpAMD64VPMINUW128, - ssa.OpAMD64VPMINUD128, - ssa.OpAMD64VPMINUD256, - ssa.OpAMD64VPMINUB128, - ssa.OpAMD64VPMINUB256, - ssa.OpAMD64VMINPS512, - ssa.OpAMD64VMINPD512, + ssa.OpAMD64VPMINSB512, + ssa.OpAMD64VPMINSW128, + ssa.OpAMD64VPMINSW256, ssa.OpAMD64VPMINSW512, + ssa.OpAMD64VPMINSD128, + ssa.OpAMD64VPMINSD256, ssa.OpAMD64VPMINSD512, ssa.OpAMD64VPMINSQ128, ssa.OpAMD64VPMINSQ256, ssa.OpAMD64VPMINSQ512, - ssa.OpAMD64VPMINSB512, + ssa.OpAMD64VPMINUB128, + ssa.OpAMD64VPMINUB256, + ssa.OpAMD64VPMINUB512, + ssa.OpAMD64VPMINUW128, + ssa.OpAMD64VPMINUW256, ssa.OpAMD64VPMINUW512, + ssa.OpAMD64VPMINUD128, + ssa.OpAMD64VPMINUD256, ssa.OpAMD64VPMINUD512, ssa.OpAMD64VPMINUQ128, ssa.OpAMD64VPMINUQ256, ssa.OpAMD64VPMINUQ512, - ssa.OpAMD64VPMINUB512, ssa.OpAMD64VMULPS128, ssa.OpAMD64VMULPS256, + ssa.OpAMD64VMULPS512, ssa.OpAMD64VMULPD128, ssa.OpAMD64VMULPD256, - ssa.OpAMD64VMULPS512, ssa.OpAMD64VMULPD512, - ssa.OpAMD64VSCALEFPS512, ssa.OpAMD64VSCALEFPS128, ssa.OpAMD64VSCALEFPS256, + ssa.OpAMD64VSCALEFPS512, ssa.OpAMD64VSCALEFPD128, ssa.OpAMD64VSCALEFPD256, ssa.OpAMD64VSCALEFPD512, ssa.OpAMD64VPMULDQ128, ssa.OpAMD64VPMULDQ256, + ssa.OpAMD64VPMULDQ512, ssa.OpAMD64VPMULUDQ128, ssa.OpAMD64VPMULUDQ256, - ssa.OpAMD64VPMULDQ512, ssa.OpAMD64VPMULUDQ512, - ssa.OpAMD64VPMULHW256, ssa.OpAMD64VPMULHW128, - ssa.OpAMD64VPMULHUW256, - ssa.OpAMD64VPMULHUW128, + ssa.OpAMD64VPMULHW256, ssa.OpAMD64VPMULHW512, + ssa.OpAMD64VPMULHUW128, + ssa.OpAMD64VPMULHUW256, ssa.OpAMD64VPMULHUW512, - ssa.OpAMD64VPMULLW256, ssa.OpAMD64VPMULLW128, + ssa.OpAMD64VPMULLW256, + ssa.OpAMD64VPMULLW512, ssa.OpAMD64VPMULLD128, ssa.OpAMD64VPMULLD256, - ssa.OpAMD64VPMULLW512, ssa.OpAMD64VPMULLD512, ssa.OpAMD64VPMULLQ128, ssa.OpAMD64VPMULLQ256, ssa.OpAMD64VPMULLQ512, ssa.OpAMD64VORPS128, ssa.OpAMD64VORPS256, + ssa.OpAMD64VORPS512, ssa.OpAMD64VORPD128, ssa.OpAMD64VORPD256, - ssa.OpAMD64VPOR256, - ssa.OpAMD64VPOR128, - ssa.OpAMD64VORPS512, ssa.OpAMD64VORPD512, + ssa.OpAMD64VPOR128, + ssa.OpAMD64VPOR256, ssa.OpAMD64VPORD512, ssa.OpAMD64VPORQ512, - ssa.OpAMD64VPMADDWD256, ssa.OpAMD64VPMADDWD128, + ssa.OpAMD64VPMADDWD256, ssa.OpAMD64VPMADDWD512, ssa.OpAMD64VHADDPS128, ssa.OpAMD64VHADDPS256, ssa.OpAMD64VHADDPD128, ssa.OpAMD64VHADDPD256, - ssa.OpAMD64VPHADDW256, ssa.OpAMD64VPHADDW128, + ssa.OpAMD64VPHADDW256, ssa.OpAMD64VPHADDD128, ssa.OpAMD64VPHADDD256, ssa.OpAMD64VHSUBPS128, ssa.OpAMD64VHSUBPS256, ssa.OpAMD64VHSUBPD128, ssa.OpAMD64VHSUBPD256, - ssa.OpAMD64VPHSUBW256, ssa.OpAMD64VPHSUBW128, + ssa.OpAMD64VPHSUBW256, ssa.OpAMD64VPHSUBD128, ssa.OpAMD64VPHSUBD256, - ssa.OpAMD64VPADDSW256, - ssa.OpAMD64VPADDSW128, ssa.OpAMD64VPADDSB128, ssa.OpAMD64VPADDSB256, - ssa.OpAMD64VPADDSW512, ssa.OpAMD64VPADDSB512, - ssa.OpAMD64VPHADDSW256, + ssa.OpAMD64VPADDSW128, + ssa.OpAMD64VPADDSW256, + ssa.OpAMD64VPADDSW512, ssa.OpAMD64VPHADDSW128, - ssa.OpAMD64VPHSUBSW256, + ssa.OpAMD64VPHADDSW256, ssa.OpAMD64VPHSUBSW128, - ssa.OpAMD64VPSUBSW256, - ssa.OpAMD64VPSUBSW128, + ssa.OpAMD64VPHSUBSW256, ssa.OpAMD64VPSUBSB128, ssa.OpAMD64VPSUBSB256, - ssa.OpAMD64VPSUBSW512, ssa.OpAMD64VPSUBSB512, + ssa.OpAMD64VPSUBSW128, + ssa.OpAMD64VPSUBSW256, + ssa.OpAMD64VPSUBSW512, ssa.OpAMD64VPMADDUBSW128, ssa.OpAMD64VPMADDUBSW256, ssa.OpAMD64VPMADDUBSW512, - ssa.OpAMD64VPSIGNW256, - ssa.OpAMD64VPSIGNW128, - ssa.OpAMD64VPSIGND128, - ssa.OpAMD64VPSIGND256, ssa.OpAMD64VPSIGNB128, ssa.OpAMD64VPSIGNB256, - ssa.OpAMD64VPSUBW256, - ssa.OpAMD64VPSUBW128, - ssa.OpAMD64VPSUBD128, - ssa.OpAMD64VPSUBD256, - ssa.OpAMD64VPSUBQ128, - ssa.OpAMD64VPSUBQ256, + ssa.OpAMD64VPSIGNW128, + ssa.OpAMD64VPSIGNW256, + ssa.OpAMD64VPSIGND128, + ssa.OpAMD64VPSIGND256, + ssa.OpAMD64VSUBPS128, + ssa.OpAMD64VSUBPS256, + ssa.OpAMD64VSUBPS512, + ssa.OpAMD64VSUBPD128, + ssa.OpAMD64VSUBPD256, + ssa.OpAMD64VSUBPD512, ssa.OpAMD64VPSUBB128, ssa.OpAMD64VPSUBB256, - ssa.OpAMD64VPSUBW512, - ssa.OpAMD64VPSUBD512, - ssa.OpAMD64VPSUBQ512, ssa.OpAMD64VPSUBB512, + ssa.OpAMD64VPSUBW128, + ssa.OpAMD64VPSUBW256, + ssa.OpAMD64VPSUBW512, + ssa.OpAMD64VPSUBD128, + ssa.OpAMD64VPSUBD256, + ssa.OpAMD64VPSUBD512, + ssa.OpAMD64VPSUBQ128, + ssa.OpAMD64VPSUBQ256, + ssa.OpAMD64VPSUBQ512, ssa.OpAMD64VXORPS128, ssa.OpAMD64VXORPS256, + ssa.OpAMD64VXORPS512, ssa.OpAMD64VXORPD128, ssa.OpAMD64VXORPD256, - ssa.OpAMD64VPXOR256, - ssa.OpAMD64VPXOR128, - ssa.OpAMD64VXORPS512, ssa.OpAMD64VXORPD512, + ssa.OpAMD64VPXOR128, + ssa.OpAMD64VPXOR256, ssa.OpAMD64VPXORD512, ssa.OpAMD64VPXORQ512: p = simdFp21(s, v) - case ssa.OpAMD64VPCMPEQW512, - ssa.OpAMD64VPCMPEQD512, - ssa.OpAMD64VPCMPEQQ512, - ssa.OpAMD64VPCMPEQB512, - ssa.OpAMD64VPCMPGTW512, - ssa.OpAMD64VPCMPGTD512, - ssa.OpAMD64VPCMPGTQ128, - ssa.OpAMD64VPCMPGTQ512, - ssa.OpAMD64VPCMPGTB512: - p = simdFp2k1(s, v) - - case ssa.OpAMD64VADDPSMasked512, - ssa.OpAMD64VADDPSMasked128, + case ssa.OpAMD64VADDPSMasked128, ssa.OpAMD64VADDPSMasked256, + ssa.OpAMD64VADDPSMasked512, ssa.OpAMD64VADDPDMasked128, ssa.OpAMD64VADDPDMasked256, ssa.OpAMD64VADDPDMasked512, - ssa.OpAMD64VPADDWMasked256, - ssa.OpAMD64VPADDWMasked512, - ssa.OpAMD64VPADDWMasked128, - ssa.OpAMD64VPADDDMasked512, - ssa.OpAMD64VPADDDMasked128, - ssa.OpAMD64VPADDDMasked256, - ssa.OpAMD64VPADDQMasked128, - ssa.OpAMD64VPADDQMasked256, - ssa.OpAMD64VPADDQMasked512, ssa.OpAMD64VPADDBMasked128, ssa.OpAMD64VPADDBMasked256, ssa.OpAMD64VPADDBMasked512, - ssa.OpAMD64VANDPSMasked512, + ssa.OpAMD64VPADDWMasked128, + ssa.OpAMD64VPADDWMasked256, + ssa.OpAMD64VPADDWMasked512, + ssa.OpAMD64VPADDDMasked128, + ssa.OpAMD64VPADDDMasked256, + ssa.OpAMD64VPADDDMasked512, + ssa.OpAMD64VPADDQMasked128, + ssa.OpAMD64VPADDQMasked256, + ssa.OpAMD64VPADDQMasked512, ssa.OpAMD64VANDPSMasked128, ssa.OpAMD64VANDPSMasked256, + ssa.OpAMD64VANDPSMasked512, ssa.OpAMD64VANDPDMasked128, ssa.OpAMD64VANDPDMasked256, ssa.OpAMD64VANDPDMasked512, - ssa.OpAMD64VPANDDMasked512, ssa.OpAMD64VPANDDMasked128, ssa.OpAMD64VPANDDMasked256, + ssa.OpAMD64VPANDDMasked512, ssa.OpAMD64VPANDQMasked128, ssa.OpAMD64VPANDQMasked256, ssa.OpAMD64VPANDQMasked512, - ssa.OpAMD64VANDNPSMasked512, ssa.OpAMD64VANDNPSMasked128, ssa.OpAMD64VANDNPSMasked256, + ssa.OpAMD64VANDNPSMasked512, ssa.OpAMD64VANDNPDMasked128, ssa.OpAMD64VANDNPDMasked256, ssa.OpAMD64VANDNPDMasked512, - ssa.OpAMD64VPANDNDMasked512, ssa.OpAMD64VPANDNDMasked128, ssa.OpAMD64VPANDNDMasked256, + ssa.OpAMD64VPANDNDMasked512, ssa.OpAMD64VPANDNQMasked128, ssa.OpAMD64VPANDNQMasked256, ssa.OpAMD64VPANDNQMasked512, - ssa.OpAMD64VPAVGWMasked256, - ssa.OpAMD64VPAVGWMasked512, - ssa.OpAMD64VPAVGWMasked128, ssa.OpAMD64VPAVGBMasked128, ssa.OpAMD64VPAVGBMasked256, ssa.OpAMD64VPAVGBMasked512, - ssa.OpAMD64VDIVPSMasked512, + ssa.OpAMD64VPAVGWMasked128, + ssa.OpAMD64VPAVGWMasked256, + ssa.OpAMD64VPAVGWMasked512, ssa.OpAMD64VDIVPSMasked128, ssa.OpAMD64VDIVPSMasked256, + ssa.OpAMD64VDIVPSMasked512, ssa.OpAMD64VDIVPDMasked128, ssa.OpAMD64VDIVPDMasked256, ssa.OpAMD64VDIVPDMasked512, - ssa.OpAMD64VMAXPSMasked512, ssa.OpAMD64VMAXPSMasked128, ssa.OpAMD64VMAXPSMasked256, + ssa.OpAMD64VMAXPSMasked512, ssa.OpAMD64VMAXPDMasked128, ssa.OpAMD64VMAXPDMasked256, ssa.OpAMD64VMAXPDMasked512, - ssa.OpAMD64VPMAXSWMasked256, - ssa.OpAMD64VPMAXSWMasked512, - ssa.OpAMD64VPMAXSWMasked128, - ssa.OpAMD64VPMAXSDMasked512, - ssa.OpAMD64VPMAXSDMasked128, - ssa.OpAMD64VPMAXSDMasked256, - ssa.OpAMD64VPMAXSQMasked128, - ssa.OpAMD64VPMAXSQMasked256, - ssa.OpAMD64VPMAXSQMasked512, ssa.OpAMD64VPMAXSBMasked128, ssa.OpAMD64VPMAXSBMasked256, ssa.OpAMD64VPMAXSBMasked512, - ssa.OpAMD64VPMAXUWMasked256, - ssa.OpAMD64VPMAXUWMasked512, - ssa.OpAMD64VPMAXUWMasked128, - ssa.OpAMD64VPMAXUDMasked512, - ssa.OpAMD64VPMAXUDMasked128, - ssa.OpAMD64VPMAXUDMasked256, - ssa.OpAMD64VPMAXUQMasked128, - ssa.OpAMD64VPMAXUQMasked256, - ssa.OpAMD64VPMAXUQMasked512, + ssa.OpAMD64VPMAXSWMasked128, + ssa.OpAMD64VPMAXSWMasked256, + ssa.OpAMD64VPMAXSWMasked512, + ssa.OpAMD64VPMAXSDMasked128, + ssa.OpAMD64VPMAXSDMasked256, + ssa.OpAMD64VPMAXSDMasked512, + ssa.OpAMD64VPMAXSQMasked128, + ssa.OpAMD64VPMAXSQMasked256, + ssa.OpAMD64VPMAXSQMasked512, ssa.OpAMD64VPMAXUBMasked128, ssa.OpAMD64VPMAXUBMasked256, ssa.OpAMD64VPMAXUBMasked512, - ssa.OpAMD64VMINPSMasked512, + ssa.OpAMD64VPMAXUWMasked128, + ssa.OpAMD64VPMAXUWMasked256, + ssa.OpAMD64VPMAXUWMasked512, + ssa.OpAMD64VPMAXUDMasked128, + ssa.OpAMD64VPMAXUDMasked256, + ssa.OpAMD64VPMAXUDMasked512, + ssa.OpAMD64VPMAXUQMasked128, + ssa.OpAMD64VPMAXUQMasked256, + ssa.OpAMD64VPMAXUQMasked512, ssa.OpAMD64VMINPSMasked128, ssa.OpAMD64VMINPSMasked256, + ssa.OpAMD64VMINPSMasked512, ssa.OpAMD64VMINPDMasked128, ssa.OpAMD64VMINPDMasked256, ssa.OpAMD64VMINPDMasked512, - ssa.OpAMD64VPMINSWMasked256, - ssa.OpAMD64VPMINSWMasked512, - ssa.OpAMD64VPMINSWMasked128, - ssa.OpAMD64VPMINSDMasked512, - ssa.OpAMD64VPMINSDMasked128, - ssa.OpAMD64VPMINSDMasked256, - ssa.OpAMD64VPMINSQMasked128, - ssa.OpAMD64VPMINSQMasked256, - ssa.OpAMD64VPMINSQMasked512, ssa.OpAMD64VPMINSBMasked128, ssa.OpAMD64VPMINSBMasked256, ssa.OpAMD64VPMINSBMasked512, - ssa.OpAMD64VPMINUWMasked256, - ssa.OpAMD64VPMINUWMasked512, - ssa.OpAMD64VPMINUWMasked128, - ssa.OpAMD64VPMINUDMasked512, - ssa.OpAMD64VPMINUDMasked128, - ssa.OpAMD64VPMINUDMasked256, - ssa.OpAMD64VPMINUQMasked128, - ssa.OpAMD64VPMINUQMasked256, - ssa.OpAMD64VPMINUQMasked512, + ssa.OpAMD64VPMINSWMasked128, + ssa.OpAMD64VPMINSWMasked256, + ssa.OpAMD64VPMINSWMasked512, + ssa.OpAMD64VPMINSDMasked128, + ssa.OpAMD64VPMINSDMasked256, + ssa.OpAMD64VPMINSDMasked512, + ssa.OpAMD64VPMINSQMasked128, + ssa.OpAMD64VPMINSQMasked256, + ssa.OpAMD64VPMINSQMasked512, ssa.OpAMD64VPMINUBMasked128, ssa.OpAMD64VPMINUBMasked256, ssa.OpAMD64VPMINUBMasked512, - ssa.OpAMD64VMULPSMasked512, + ssa.OpAMD64VPMINUWMasked128, + ssa.OpAMD64VPMINUWMasked256, + ssa.OpAMD64VPMINUWMasked512, + ssa.OpAMD64VPMINUDMasked128, + ssa.OpAMD64VPMINUDMasked256, + ssa.OpAMD64VPMINUDMasked512, + ssa.OpAMD64VPMINUQMasked128, + ssa.OpAMD64VPMINUQMasked256, + ssa.OpAMD64VPMINUQMasked512, ssa.OpAMD64VMULPSMasked128, ssa.OpAMD64VMULPSMasked256, + ssa.OpAMD64VMULPSMasked512, ssa.OpAMD64VMULPDMasked128, ssa.OpAMD64VMULPDMasked256, ssa.OpAMD64VMULPDMasked512, - ssa.OpAMD64VSCALEFPSMasked512, ssa.OpAMD64VSCALEFPSMasked128, ssa.OpAMD64VSCALEFPSMasked256, + ssa.OpAMD64VSCALEFPSMasked512, ssa.OpAMD64VSCALEFPDMasked128, ssa.OpAMD64VSCALEFPDMasked256, ssa.OpAMD64VSCALEFPDMasked512, @@ -439,142 +434,122 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMULUDQMasked128, ssa.OpAMD64VPMULUDQMasked256, ssa.OpAMD64VPMULUDQMasked512, + ssa.OpAMD64VPMULHWMasked128, ssa.OpAMD64VPMULHWMasked256, ssa.OpAMD64VPMULHWMasked512, - ssa.OpAMD64VPMULHWMasked128, + ssa.OpAMD64VPMULHUWMasked128, ssa.OpAMD64VPMULHUWMasked256, ssa.OpAMD64VPMULHUWMasked512, - ssa.OpAMD64VPMULHUWMasked128, + ssa.OpAMD64VPMULLWMasked128, ssa.OpAMD64VPMULLWMasked256, ssa.OpAMD64VPMULLWMasked512, - ssa.OpAMD64VPMULLWMasked128, - ssa.OpAMD64VPMULLDMasked512, ssa.OpAMD64VPMULLDMasked128, ssa.OpAMD64VPMULLDMasked256, + ssa.OpAMD64VPMULLDMasked512, ssa.OpAMD64VPMULLQMasked128, ssa.OpAMD64VPMULLQMasked256, ssa.OpAMD64VPMULLQMasked512, - ssa.OpAMD64VORPSMasked512, ssa.OpAMD64VORPSMasked128, ssa.OpAMD64VORPSMasked256, + ssa.OpAMD64VORPSMasked512, ssa.OpAMD64VORPDMasked128, ssa.OpAMD64VORPDMasked256, ssa.OpAMD64VORPDMasked512, - ssa.OpAMD64VPORDMasked512, ssa.OpAMD64VPORDMasked128, ssa.OpAMD64VPORDMasked256, + ssa.OpAMD64VPORDMasked512, ssa.OpAMD64VPORQMasked128, ssa.OpAMD64VPORQMasked256, ssa.OpAMD64VPORQMasked512, + ssa.OpAMD64VPMADDWDMasked128, ssa.OpAMD64VPMADDWDMasked256, ssa.OpAMD64VPMADDWDMasked512, - ssa.OpAMD64VPMADDWDMasked128, - ssa.OpAMD64VPADDSWMasked256, - ssa.OpAMD64VPADDSWMasked512, - ssa.OpAMD64VPADDSWMasked128, ssa.OpAMD64VPADDSBMasked128, ssa.OpAMD64VPADDSBMasked256, ssa.OpAMD64VPADDSBMasked512, - ssa.OpAMD64VPSUBSWMasked256, - ssa.OpAMD64VPSUBSWMasked512, - ssa.OpAMD64VPSUBSWMasked128, + ssa.OpAMD64VPADDSWMasked128, + ssa.OpAMD64VPADDSWMasked256, + ssa.OpAMD64VPADDSWMasked512, ssa.OpAMD64VPSUBSBMasked128, ssa.OpAMD64VPSUBSBMasked256, ssa.OpAMD64VPSUBSBMasked512, + ssa.OpAMD64VPSUBSWMasked128, + ssa.OpAMD64VPSUBSWMasked256, + ssa.OpAMD64VPSUBSWMasked512, + ssa.OpAMD64VPMADDUBSWMasked128, ssa.OpAMD64VPMADDUBSWMasked256, ssa.OpAMD64VPMADDUBSWMasked512, - ssa.OpAMD64VPMADDUBSWMasked128, - ssa.OpAMD64VPSUBWMasked256, - ssa.OpAMD64VPSUBWMasked512, - ssa.OpAMD64VPSUBWMasked128, - ssa.OpAMD64VPSUBDMasked512, - ssa.OpAMD64VPSUBDMasked128, - ssa.OpAMD64VPSUBDMasked256, - ssa.OpAMD64VPSUBQMasked128, - ssa.OpAMD64VPSUBQMasked256, - ssa.OpAMD64VPSUBQMasked512, + ssa.OpAMD64VSUBPSMasked128, + ssa.OpAMD64VSUBPSMasked256, + ssa.OpAMD64VSUBPSMasked512, + ssa.OpAMD64VSUBPDMasked128, + ssa.OpAMD64VSUBPDMasked256, + ssa.OpAMD64VSUBPDMasked512, ssa.OpAMD64VPSUBBMasked128, ssa.OpAMD64VPSUBBMasked256, ssa.OpAMD64VPSUBBMasked512, - ssa.OpAMD64VXORPSMasked512, + ssa.OpAMD64VPSUBWMasked128, + ssa.OpAMD64VPSUBWMasked256, + ssa.OpAMD64VPSUBWMasked512, + ssa.OpAMD64VPSUBDMasked128, + ssa.OpAMD64VPSUBDMasked256, + ssa.OpAMD64VPSUBDMasked512, + ssa.OpAMD64VPSUBQMasked128, + ssa.OpAMD64VPSUBQMasked256, + ssa.OpAMD64VPSUBQMasked512, ssa.OpAMD64VXORPSMasked128, ssa.OpAMD64VXORPSMasked256, + ssa.OpAMD64VXORPSMasked512, ssa.OpAMD64VXORPDMasked128, ssa.OpAMD64VXORPDMasked256, ssa.OpAMD64VXORPDMasked512, - ssa.OpAMD64VPXORDMasked512, ssa.OpAMD64VPXORDMasked128, ssa.OpAMD64VPXORDMasked256, + ssa.OpAMD64VPXORDMasked512, ssa.OpAMD64VPXORQMasked128, ssa.OpAMD64VPXORQMasked256, ssa.OpAMD64VPXORQMasked512: p = simdFp2k1fp1(s, v) - case ssa.OpAMD64VPCMPEQWMasked256, - ssa.OpAMD64VPCMPEQWMasked512, - ssa.OpAMD64VPCMPEQWMasked128, - ssa.OpAMD64VPCMPEQDMasked512, - ssa.OpAMD64VPCMPEQDMasked128, - ssa.OpAMD64VPCMPEQDMasked256, - ssa.OpAMD64VPCMPEQQMasked128, - ssa.OpAMD64VPCMPEQQMasked256, - ssa.OpAMD64VPCMPEQQMasked512, - ssa.OpAMD64VPCMPEQBMasked128, - ssa.OpAMD64VPCMPEQBMasked256, - ssa.OpAMD64VPCMPEQBMasked512, - ssa.OpAMD64VPCMPGTWMasked256, - ssa.OpAMD64VPCMPGTWMasked512, - ssa.OpAMD64VPCMPGTWMasked128, - ssa.OpAMD64VPCMPGTDMasked512, - ssa.OpAMD64VPCMPGTDMasked128, - ssa.OpAMD64VPCMPGTDMasked256, - ssa.OpAMD64VPCMPGTQMasked128, - ssa.OpAMD64VPCMPGTQMasked256, - ssa.OpAMD64VPCMPGTQMasked512, - ssa.OpAMD64VPCMPGTBMasked128, - ssa.OpAMD64VPCMPGTBMasked256, - ssa.OpAMD64VPCMPGTBMasked512: - p = simdFp2k1k1(s, v) - - case ssa.OpAMD64VPABSWMasked256, - ssa.OpAMD64VPABSWMasked512, + case ssa.OpAMD64VPABSBMasked128, + ssa.OpAMD64VPABSBMasked256, + ssa.OpAMD64VPABSBMasked512, ssa.OpAMD64VPABSWMasked128, - ssa.OpAMD64VPABSDMasked512, + ssa.OpAMD64VPABSWMasked256, + ssa.OpAMD64VPABSWMasked512, ssa.OpAMD64VPABSDMasked128, ssa.OpAMD64VPABSDMasked256, + ssa.OpAMD64VPABSDMasked512, ssa.OpAMD64VPABSQMasked128, ssa.OpAMD64VPABSQMasked256, ssa.OpAMD64VPABSQMasked512, - ssa.OpAMD64VPABSBMasked128, - ssa.OpAMD64VPABSBMasked256, - ssa.OpAMD64VPABSBMasked512, - ssa.OpAMD64VRCP14PSMasked512, ssa.OpAMD64VRCP14PSMasked128, ssa.OpAMD64VRCP14PSMasked256, + ssa.OpAMD64VRCP14PSMasked512, ssa.OpAMD64VRCP14PDMasked128, ssa.OpAMD64VRCP14PDMasked256, ssa.OpAMD64VRCP14PDMasked512, - ssa.OpAMD64VRSQRT14PSMasked512, ssa.OpAMD64VRSQRT14PSMasked128, ssa.OpAMD64VRSQRT14PSMasked256, + ssa.OpAMD64VRSQRT14PSMasked512, ssa.OpAMD64VRSQRT14PDMasked128, ssa.OpAMD64VRSQRT14PDMasked256, ssa.OpAMD64VRSQRT14PDMasked512, - ssa.OpAMD64VPOPCNTWMasked256, - ssa.OpAMD64VPOPCNTWMasked512, - ssa.OpAMD64VPOPCNTWMasked128, - ssa.OpAMD64VPOPCNTDMasked512, - ssa.OpAMD64VPOPCNTDMasked128, - ssa.OpAMD64VPOPCNTDMasked256, - ssa.OpAMD64VPOPCNTQMasked128, - ssa.OpAMD64VPOPCNTQMasked256, - ssa.OpAMD64VPOPCNTQMasked512, ssa.OpAMD64VPOPCNTBMasked128, ssa.OpAMD64VPOPCNTBMasked256, ssa.OpAMD64VPOPCNTBMasked512, - ssa.OpAMD64VSQRTPSMasked512, + ssa.OpAMD64VPOPCNTWMasked128, + ssa.OpAMD64VPOPCNTWMasked256, + ssa.OpAMD64VPOPCNTWMasked512, + ssa.OpAMD64VPOPCNTDMasked128, + ssa.OpAMD64VPOPCNTDMasked256, + ssa.OpAMD64VPOPCNTDMasked512, + ssa.OpAMD64VPOPCNTQMasked128, + ssa.OpAMD64VPOPCNTQMasked256, + ssa.OpAMD64VPOPCNTQMasked512, ssa.OpAMD64VSQRTPSMasked128, ssa.OpAMD64VSQRTPSMasked256, + ssa.OpAMD64VSQRTPSMasked512, ssa.OpAMD64VSQRTPDMasked128, ssa.OpAMD64VSQRTPDMasked256, ssa.OpAMD64VSQRTPDMasked512: @@ -584,29 +559,29 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VROUNDPS256, ssa.OpAMD64VROUNDPD128, ssa.OpAMD64VROUNDPD256, - ssa.OpAMD64VRNDSCALEPS512, ssa.OpAMD64VRNDSCALEPS128, ssa.OpAMD64VRNDSCALEPS256, + ssa.OpAMD64VRNDSCALEPS512, ssa.OpAMD64VRNDSCALEPD128, ssa.OpAMD64VRNDSCALEPD256, ssa.OpAMD64VRNDSCALEPD512, - ssa.OpAMD64VREDUCEPS512, ssa.OpAMD64VREDUCEPS128, ssa.OpAMD64VREDUCEPS256, + ssa.OpAMD64VREDUCEPS512, ssa.OpAMD64VREDUCEPD128, ssa.OpAMD64VREDUCEPD256, ssa.OpAMD64VREDUCEPD512: p = simdFp11Imm8(s, v) - case ssa.OpAMD64VRNDSCALEPSMasked512, - ssa.OpAMD64VRNDSCALEPSMasked128, + case ssa.OpAMD64VRNDSCALEPSMasked128, ssa.OpAMD64VRNDSCALEPSMasked256, + ssa.OpAMD64VRNDSCALEPSMasked512, ssa.OpAMD64VRNDSCALEPDMasked128, ssa.OpAMD64VRNDSCALEPDMasked256, ssa.OpAMD64VRNDSCALEPDMasked512, - ssa.OpAMD64VREDUCEPSMasked512, ssa.OpAMD64VREDUCEPSMasked128, ssa.OpAMD64VREDUCEPSMasked256, + ssa.OpAMD64VREDUCEPSMasked512, ssa.OpAMD64VREDUCEPDMasked128, ssa.OpAMD64VREDUCEPDMasked256, ssa.OpAMD64VREDUCEPDMasked512: @@ -621,169 +596,169 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { case ssa.OpAMD64VCMPPS512, ssa.OpAMD64VCMPPD512, - ssa.OpAMD64VPCMPUW256, - ssa.OpAMD64VPCMPUW512, - ssa.OpAMD64VPCMPUW128, - ssa.OpAMD64VPCMPUD512, - ssa.OpAMD64VPCMPUD128, - ssa.OpAMD64VPCMPUD256, - ssa.OpAMD64VPCMPUQ128, - ssa.OpAMD64VPCMPUQ256, - ssa.OpAMD64VPCMPUQ512, + ssa.OpAMD64VPCMPB512, + ssa.OpAMD64VPCMPW512, + ssa.OpAMD64VPCMPD512, + ssa.OpAMD64VPCMPQ512, ssa.OpAMD64VPCMPUB128, ssa.OpAMD64VPCMPUB256, ssa.OpAMD64VPCMPUB512, - ssa.OpAMD64VPCMPW256, - ssa.OpAMD64VPCMPW512, - ssa.OpAMD64VPCMPW128, - ssa.OpAMD64VPCMPD512, - ssa.OpAMD64VPCMPD128, - ssa.OpAMD64VPCMPD256, + ssa.OpAMD64VPCMPUW128, + ssa.OpAMD64VPCMPUW256, + ssa.OpAMD64VPCMPUW512, + ssa.OpAMD64VPCMPUD128, + ssa.OpAMD64VPCMPUD256, + ssa.OpAMD64VPCMPUD512, + ssa.OpAMD64VPCMPUQ128, + ssa.OpAMD64VPCMPUQ256, + ssa.OpAMD64VPCMPUQ512, ssa.OpAMD64VPCMPQ128, - ssa.OpAMD64VPCMPQ256, - ssa.OpAMD64VPCMPQ512, ssa.OpAMD64VPCMPB128, ssa.OpAMD64VPCMPB256, - ssa.OpAMD64VPCMPB512: + ssa.OpAMD64VPCMPW128, + ssa.OpAMD64VPCMPW256, + ssa.OpAMD64VPCMPD128, + ssa.OpAMD64VPCMPD256, + ssa.OpAMD64VPCMPQ256: p = simdFp2k1Imm8(s, v) - case ssa.OpAMD64VCMPPSMasked512, - ssa.OpAMD64VCMPPSMasked128, + case ssa.OpAMD64VCMPPSMasked128, ssa.OpAMD64VCMPPSMasked256, + ssa.OpAMD64VCMPPSMasked512, ssa.OpAMD64VCMPPDMasked128, ssa.OpAMD64VCMPPDMasked256, ssa.OpAMD64VCMPPDMasked512, - ssa.OpAMD64VPCMPUWMasked256, - ssa.OpAMD64VPCMPUWMasked512, - ssa.OpAMD64VPCMPUWMasked128, - ssa.OpAMD64VPCMPUDMasked512, - ssa.OpAMD64VPCMPUDMasked128, - ssa.OpAMD64VPCMPUDMasked256, - ssa.OpAMD64VPCMPUQMasked128, - ssa.OpAMD64VPCMPUQMasked256, - ssa.OpAMD64VPCMPUQMasked512, - ssa.OpAMD64VPCMPUBMasked128, - ssa.OpAMD64VPCMPUBMasked256, - ssa.OpAMD64VPCMPUBMasked512, + ssa.OpAMD64VPCMPBMasked128, + ssa.OpAMD64VPCMPBMasked256, + ssa.OpAMD64VPCMPBMasked512, + ssa.OpAMD64VPCMPWMasked128, ssa.OpAMD64VPCMPWMasked256, ssa.OpAMD64VPCMPWMasked512, - ssa.OpAMD64VPCMPWMasked128, - ssa.OpAMD64VPCMPDMasked512, ssa.OpAMD64VPCMPDMasked128, ssa.OpAMD64VPCMPDMasked256, + ssa.OpAMD64VPCMPDMasked512, ssa.OpAMD64VPCMPQMasked128, ssa.OpAMD64VPCMPQMasked256, ssa.OpAMD64VPCMPQMasked512, - ssa.OpAMD64VPCMPBMasked128, - ssa.OpAMD64VPCMPBMasked256, - ssa.OpAMD64VPCMPBMasked512: + ssa.OpAMD64VPCMPUBMasked128, + ssa.OpAMD64VPCMPUBMasked256, + ssa.OpAMD64VPCMPUBMasked512, + ssa.OpAMD64VPCMPUWMasked128, + ssa.OpAMD64VPCMPUWMasked256, + ssa.OpAMD64VPCMPUWMasked512, + ssa.OpAMD64VPCMPUDMasked128, + ssa.OpAMD64VPCMPUDMasked256, + ssa.OpAMD64VPCMPUDMasked512, + ssa.OpAMD64VPCMPUQMasked128, + ssa.OpAMD64VPCMPUQMasked256, + ssa.OpAMD64VPCMPUQMasked512: p = simdFp2k1k1Imm8(s, v) - case ssa.OpAMD64VFMADD132PS512, - ssa.OpAMD64VFMADD132PS128, + case ssa.OpAMD64VFMADD132PS128, ssa.OpAMD64VFMADD132PS256, + ssa.OpAMD64VFMADD132PS512, ssa.OpAMD64VFMADD132PD128, ssa.OpAMD64VFMADD132PD256, ssa.OpAMD64VFMADD132PD512, - ssa.OpAMD64VFMADD213PS512, ssa.OpAMD64VFMADD213PS128, ssa.OpAMD64VFMADD213PS256, + ssa.OpAMD64VFMADD213PS512, ssa.OpAMD64VFMADD213PD128, ssa.OpAMD64VFMADD213PD256, ssa.OpAMD64VFMADD213PD512, - ssa.OpAMD64VFMADD231PS512, ssa.OpAMD64VFMADD231PS128, ssa.OpAMD64VFMADD231PS256, + ssa.OpAMD64VFMADD231PS512, ssa.OpAMD64VFMADD231PD128, ssa.OpAMD64VFMADD231PD256, ssa.OpAMD64VFMADD231PD512, - ssa.OpAMD64VFMADDSUB132PS512, ssa.OpAMD64VFMADDSUB132PS128, ssa.OpAMD64VFMADDSUB132PS256, + ssa.OpAMD64VFMADDSUB132PS512, ssa.OpAMD64VFMADDSUB132PD128, ssa.OpAMD64VFMADDSUB132PD256, ssa.OpAMD64VFMADDSUB132PD512, - ssa.OpAMD64VFMADDSUB213PS512, ssa.OpAMD64VFMADDSUB213PS128, ssa.OpAMD64VFMADDSUB213PS256, + ssa.OpAMD64VFMADDSUB213PS512, ssa.OpAMD64VFMADDSUB213PD128, ssa.OpAMD64VFMADDSUB213PD256, ssa.OpAMD64VFMADDSUB213PD512, - ssa.OpAMD64VFMADDSUB231PS512, ssa.OpAMD64VFMADDSUB231PS128, ssa.OpAMD64VFMADDSUB231PS256, + ssa.OpAMD64VFMADDSUB231PS512, ssa.OpAMD64VFMADDSUB231PD128, ssa.OpAMD64VFMADDSUB231PD256, ssa.OpAMD64VFMADDSUB231PD512, - ssa.OpAMD64VFMSUB132PS512, ssa.OpAMD64VFMSUB132PS128, ssa.OpAMD64VFMSUB132PS256, + ssa.OpAMD64VFMSUB132PS512, ssa.OpAMD64VFMSUB132PD128, ssa.OpAMD64VFMSUB132PD256, ssa.OpAMD64VFMSUB132PD512, - ssa.OpAMD64VFMSUB213PS512, ssa.OpAMD64VFMSUB213PS128, ssa.OpAMD64VFMSUB213PS256, + ssa.OpAMD64VFMSUB213PS512, ssa.OpAMD64VFMSUB213PD128, ssa.OpAMD64VFMSUB213PD256, ssa.OpAMD64VFMSUB213PD512, - ssa.OpAMD64VFMSUB231PS512, ssa.OpAMD64VFMSUB231PS128, ssa.OpAMD64VFMSUB231PS256, + ssa.OpAMD64VFMSUB231PS512, ssa.OpAMD64VFMSUB231PD128, ssa.OpAMD64VFMSUB231PD256, ssa.OpAMD64VFMSUB231PD512, - ssa.OpAMD64VFMSUBADD132PS512, ssa.OpAMD64VFMSUBADD132PS128, ssa.OpAMD64VFMSUBADD132PS256, + ssa.OpAMD64VFMSUBADD132PS512, ssa.OpAMD64VFMSUBADD132PD128, ssa.OpAMD64VFMSUBADD132PD256, ssa.OpAMD64VFMSUBADD132PD512, - ssa.OpAMD64VFMSUBADD213PS512, ssa.OpAMD64VFMSUBADD213PS128, ssa.OpAMD64VFMSUBADD213PS256, + ssa.OpAMD64VFMSUBADD213PS512, ssa.OpAMD64VFMSUBADD213PD128, ssa.OpAMD64VFMSUBADD213PD256, ssa.OpAMD64VFMSUBADD213PD512, - ssa.OpAMD64VFMSUBADD231PS512, ssa.OpAMD64VFMSUBADD231PS128, ssa.OpAMD64VFMSUBADD231PS256, + ssa.OpAMD64VFMSUBADD231PS512, ssa.OpAMD64VFMSUBADD231PD128, ssa.OpAMD64VFMSUBADD231PD256, ssa.OpAMD64VFMSUBADD231PD512, - ssa.OpAMD64VFNMADD132PS512, ssa.OpAMD64VFNMADD132PS128, ssa.OpAMD64VFNMADD132PS256, + ssa.OpAMD64VFNMADD132PS512, ssa.OpAMD64VFNMADD132PD128, ssa.OpAMD64VFNMADD132PD256, ssa.OpAMD64VFNMADD132PD512, - ssa.OpAMD64VFNMADD213PS512, ssa.OpAMD64VFNMADD213PS128, ssa.OpAMD64VFNMADD213PS256, + ssa.OpAMD64VFNMADD213PS512, ssa.OpAMD64VFNMADD213PD128, ssa.OpAMD64VFNMADD213PD256, ssa.OpAMD64VFNMADD213PD512, - ssa.OpAMD64VFNMADD231PS512, ssa.OpAMD64VFNMADD231PS128, ssa.OpAMD64VFNMADD231PS256, + ssa.OpAMD64VFNMADD231PS512, ssa.OpAMD64VFNMADD231PD128, ssa.OpAMD64VFNMADD231PD256, ssa.OpAMD64VFNMADD231PD512, - ssa.OpAMD64VFNMSUB132PS512, ssa.OpAMD64VFNMSUB132PS128, ssa.OpAMD64VFNMSUB132PS256, + ssa.OpAMD64VFNMSUB132PS512, ssa.OpAMD64VFNMSUB132PD128, ssa.OpAMD64VFNMSUB132PD256, ssa.OpAMD64VFNMSUB132PD512, - ssa.OpAMD64VFNMSUB213PS512, ssa.OpAMD64VFNMSUB213PS128, ssa.OpAMD64VFNMSUB213PS256, + ssa.OpAMD64VFNMSUB213PS512, ssa.OpAMD64VFNMSUB213PD128, ssa.OpAMD64VFNMSUB213PD256, ssa.OpAMD64VFNMSUB213PD512, - ssa.OpAMD64VFNMSUB231PS512, ssa.OpAMD64VFNMSUB231PS128, ssa.OpAMD64VFNMSUB231PS256, + ssa.OpAMD64VFNMSUB231PS512, ssa.OpAMD64VFNMSUB231PD128, ssa.OpAMD64VFNMSUB231PD256, ssa.OpAMD64VFNMSUB231PD512, @@ -801,126 +776,126 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPDPBUSD512: p = simdFp31ResultInArg0(s, v) - case ssa.OpAMD64VFMADD132PSMasked512, - ssa.OpAMD64VFMADD132PSMasked128, + case ssa.OpAMD64VFMADD132PSMasked128, ssa.OpAMD64VFMADD132PSMasked256, + ssa.OpAMD64VFMADD132PSMasked512, ssa.OpAMD64VFMADD132PDMasked128, ssa.OpAMD64VFMADD132PDMasked256, ssa.OpAMD64VFMADD132PDMasked512, - ssa.OpAMD64VFMADD213PSMasked512, ssa.OpAMD64VFMADD213PSMasked128, ssa.OpAMD64VFMADD213PSMasked256, + ssa.OpAMD64VFMADD213PSMasked512, ssa.OpAMD64VFMADD213PDMasked128, ssa.OpAMD64VFMADD213PDMasked256, ssa.OpAMD64VFMADD213PDMasked512, - ssa.OpAMD64VFMADD231PSMasked512, ssa.OpAMD64VFMADD231PSMasked128, ssa.OpAMD64VFMADD231PSMasked256, + ssa.OpAMD64VFMADD231PSMasked512, ssa.OpAMD64VFMADD231PDMasked128, ssa.OpAMD64VFMADD231PDMasked256, ssa.OpAMD64VFMADD231PDMasked512, - ssa.OpAMD64VFMADDSUB132PSMasked512, ssa.OpAMD64VFMADDSUB132PSMasked128, ssa.OpAMD64VFMADDSUB132PSMasked256, + ssa.OpAMD64VFMADDSUB132PSMasked512, ssa.OpAMD64VFMADDSUB132PDMasked128, ssa.OpAMD64VFMADDSUB132PDMasked256, ssa.OpAMD64VFMADDSUB132PDMasked512, - ssa.OpAMD64VFMADDSUB213PSMasked512, ssa.OpAMD64VFMADDSUB213PSMasked128, ssa.OpAMD64VFMADDSUB213PSMasked256, + ssa.OpAMD64VFMADDSUB213PSMasked512, ssa.OpAMD64VFMADDSUB213PDMasked128, ssa.OpAMD64VFMADDSUB213PDMasked256, ssa.OpAMD64VFMADDSUB213PDMasked512, - ssa.OpAMD64VFMADDSUB231PSMasked512, ssa.OpAMD64VFMADDSUB231PSMasked128, ssa.OpAMD64VFMADDSUB231PSMasked256, + ssa.OpAMD64VFMADDSUB231PSMasked512, ssa.OpAMD64VFMADDSUB231PDMasked128, ssa.OpAMD64VFMADDSUB231PDMasked256, ssa.OpAMD64VFMADDSUB231PDMasked512, - ssa.OpAMD64VFMSUB132PSMasked512, ssa.OpAMD64VFMSUB132PSMasked128, ssa.OpAMD64VFMSUB132PSMasked256, + ssa.OpAMD64VFMSUB132PSMasked512, ssa.OpAMD64VFMSUB132PDMasked128, ssa.OpAMD64VFMSUB132PDMasked256, ssa.OpAMD64VFMSUB132PDMasked512, - ssa.OpAMD64VFMSUB213PSMasked512, ssa.OpAMD64VFMSUB213PSMasked128, ssa.OpAMD64VFMSUB213PSMasked256, + ssa.OpAMD64VFMSUB213PSMasked512, ssa.OpAMD64VFMSUB213PDMasked128, ssa.OpAMD64VFMSUB213PDMasked256, ssa.OpAMD64VFMSUB213PDMasked512, - ssa.OpAMD64VFMSUB231PSMasked512, ssa.OpAMD64VFMSUB231PSMasked128, ssa.OpAMD64VFMSUB231PSMasked256, + ssa.OpAMD64VFMSUB231PSMasked512, ssa.OpAMD64VFMSUB231PDMasked128, ssa.OpAMD64VFMSUB231PDMasked256, ssa.OpAMD64VFMSUB231PDMasked512, - ssa.OpAMD64VFMSUBADD132PSMasked512, ssa.OpAMD64VFMSUBADD132PSMasked128, ssa.OpAMD64VFMSUBADD132PSMasked256, + ssa.OpAMD64VFMSUBADD132PSMasked512, ssa.OpAMD64VFMSUBADD132PDMasked128, ssa.OpAMD64VFMSUBADD132PDMasked256, ssa.OpAMD64VFMSUBADD132PDMasked512, - ssa.OpAMD64VFMSUBADD213PSMasked512, ssa.OpAMD64VFMSUBADD213PSMasked128, ssa.OpAMD64VFMSUBADD213PSMasked256, + ssa.OpAMD64VFMSUBADD213PSMasked512, ssa.OpAMD64VFMSUBADD213PDMasked128, ssa.OpAMD64VFMSUBADD213PDMasked256, ssa.OpAMD64VFMSUBADD213PDMasked512, - ssa.OpAMD64VFMSUBADD231PSMasked512, ssa.OpAMD64VFMSUBADD231PSMasked128, ssa.OpAMD64VFMSUBADD231PSMasked256, + ssa.OpAMD64VFMSUBADD231PSMasked512, ssa.OpAMD64VFMSUBADD231PDMasked128, ssa.OpAMD64VFMSUBADD231PDMasked256, ssa.OpAMD64VFMSUBADD231PDMasked512, - ssa.OpAMD64VFNMADD132PSMasked512, ssa.OpAMD64VFNMADD132PSMasked128, ssa.OpAMD64VFNMADD132PSMasked256, + ssa.OpAMD64VFNMADD132PSMasked512, ssa.OpAMD64VFNMADD132PDMasked128, ssa.OpAMD64VFNMADD132PDMasked256, ssa.OpAMD64VFNMADD132PDMasked512, - ssa.OpAMD64VFNMADD213PSMasked512, ssa.OpAMD64VFNMADD213PSMasked128, ssa.OpAMD64VFNMADD213PSMasked256, + ssa.OpAMD64VFNMADD213PSMasked512, ssa.OpAMD64VFNMADD213PDMasked128, ssa.OpAMD64VFNMADD213PDMasked256, ssa.OpAMD64VFNMADD213PDMasked512, - ssa.OpAMD64VFNMADD231PSMasked512, ssa.OpAMD64VFNMADD231PSMasked128, ssa.OpAMD64VFNMADD231PSMasked256, + ssa.OpAMD64VFNMADD231PSMasked512, ssa.OpAMD64VFNMADD231PDMasked128, ssa.OpAMD64VFNMADD231PDMasked256, ssa.OpAMD64VFNMADD231PDMasked512, - ssa.OpAMD64VFNMSUB132PSMasked512, ssa.OpAMD64VFNMSUB132PSMasked128, ssa.OpAMD64VFNMSUB132PSMasked256, + ssa.OpAMD64VFNMSUB132PSMasked512, ssa.OpAMD64VFNMSUB132PDMasked128, ssa.OpAMD64VFNMSUB132PDMasked256, ssa.OpAMD64VFNMSUB132PDMasked512, - ssa.OpAMD64VFNMSUB213PSMasked512, ssa.OpAMD64VFNMSUB213PSMasked128, ssa.OpAMD64VFNMSUB213PSMasked256, + ssa.OpAMD64VFNMSUB213PSMasked512, ssa.OpAMD64VFNMSUB213PDMasked128, ssa.OpAMD64VFNMSUB213PDMasked256, ssa.OpAMD64VFNMSUB213PDMasked512, - ssa.OpAMD64VFNMSUB231PSMasked512, ssa.OpAMD64VFNMSUB231PSMasked128, ssa.OpAMD64VFNMSUB231PSMasked256, + ssa.OpAMD64VFNMSUB231PSMasked512, ssa.OpAMD64VFNMSUB231PDMasked128, ssa.OpAMD64VFNMSUB231PDMasked256, ssa.OpAMD64VFNMSUB231PDMasked512, - ssa.OpAMD64VPDPWSSDMasked512, ssa.OpAMD64VPDPWSSDMasked128, ssa.OpAMD64VPDPWSSDMasked256, - ssa.OpAMD64VPDPWSSDSMasked512, + ssa.OpAMD64VPDPWSSDMasked512, ssa.OpAMD64VPDPWSSDSMasked128, ssa.OpAMD64VPDPWSSDSMasked256, - ssa.OpAMD64VPDPBUSDSMasked512, + ssa.OpAMD64VPDPWSSDSMasked512, ssa.OpAMD64VPDPBUSDSMasked128, ssa.OpAMD64VPDPBUSDSMasked256, - ssa.OpAMD64VPDPBUSDMasked512, + ssa.OpAMD64VPDPBUSDSMasked512, ssa.OpAMD64VPDPBUSDMasked128, - ssa.OpAMD64VPDPBUSDMasked256: + ssa.OpAMD64VPDPBUSDMasked256, + ssa.OpAMD64VPDPBUSDMasked512: p = simdFp3k1fp1ResultInArg0(s, v) default: @@ -930,273 +905,273 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { // Masked operation are always compiled with zeroing. switch v.Op { - case ssa.OpAMD64VPABSWMasked256, - ssa.OpAMD64VPABSWMasked512, + case ssa.OpAMD64VPABSBMasked128, + ssa.OpAMD64VPABSBMasked256, + ssa.OpAMD64VPABSBMasked512, ssa.OpAMD64VPABSWMasked128, - ssa.OpAMD64VPABSDMasked512, + ssa.OpAMD64VPABSWMasked256, + ssa.OpAMD64VPABSWMasked512, ssa.OpAMD64VPABSDMasked128, ssa.OpAMD64VPABSDMasked256, + ssa.OpAMD64VPABSDMasked512, ssa.OpAMD64VPABSQMasked128, ssa.OpAMD64VPABSQMasked256, ssa.OpAMD64VPABSQMasked512, - ssa.OpAMD64VPABSBMasked128, - ssa.OpAMD64VPABSBMasked256, - ssa.OpAMD64VPABSBMasked512, - ssa.OpAMD64VADDPSMasked512, ssa.OpAMD64VADDPSMasked128, ssa.OpAMD64VADDPSMasked256, + ssa.OpAMD64VADDPSMasked512, ssa.OpAMD64VADDPDMasked128, ssa.OpAMD64VADDPDMasked256, ssa.OpAMD64VADDPDMasked512, - ssa.OpAMD64VPADDWMasked256, - ssa.OpAMD64VPADDWMasked512, - ssa.OpAMD64VPADDWMasked128, - ssa.OpAMD64VPADDDMasked512, - ssa.OpAMD64VPADDDMasked128, - ssa.OpAMD64VPADDDMasked256, - ssa.OpAMD64VPADDQMasked128, - ssa.OpAMD64VPADDQMasked256, - ssa.OpAMD64VPADDQMasked512, ssa.OpAMD64VPADDBMasked128, ssa.OpAMD64VPADDBMasked256, ssa.OpAMD64VPADDBMasked512, - ssa.OpAMD64VANDPSMasked512, + ssa.OpAMD64VPADDWMasked128, + ssa.OpAMD64VPADDWMasked256, + ssa.OpAMD64VPADDWMasked512, + ssa.OpAMD64VPADDDMasked128, + ssa.OpAMD64VPADDDMasked256, + ssa.OpAMD64VPADDDMasked512, + ssa.OpAMD64VPADDQMasked128, + ssa.OpAMD64VPADDQMasked256, + ssa.OpAMD64VPADDQMasked512, ssa.OpAMD64VANDPSMasked128, ssa.OpAMD64VANDPSMasked256, + ssa.OpAMD64VANDPSMasked512, ssa.OpAMD64VANDPDMasked128, ssa.OpAMD64VANDPDMasked256, ssa.OpAMD64VANDPDMasked512, - ssa.OpAMD64VPANDDMasked512, ssa.OpAMD64VPANDDMasked128, ssa.OpAMD64VPANDDMasked256, + ssa.OpAMD64VPANDDMasked512, ssa.OpAMD64VPANDQMasked128, ssa.OpAMD64VPANDQMasked256, ssa.OpAMD64VPANDQMasked512, - ssa.OpAMD64VANDNPSMasked512, ssa.OpAMD64VANDNPSMasked128, ssa.OpAMD64VANDNPSMasked256, + ssa.OpAMD64VANDNPSMasked512, ssa.OpAMD64VANDNPDMasked128, ssa.OpAMD64VANDNPDMasked256, ssa.OpAMD64VANDNPDMasked512, - ssa.OpAMD64VPANDNDMasked512, ssa.OpAMD64VPANDNDMasked128, ssa.OpAMD64VPANDNDMasked256, + ssa.OpAMD64VPANDNDMasked512, ssa.OpAMD64VPANDNQMasked128, ssa.OpAMD64VPANDNQMasked256, ssa.OpAMD64VPANDNQMasked512, - ssa.OpAMD64VRCP14PSMasked512, ssa.OpAMD64VRCP14PSMasked128, ssa.OpAMD64VRCP14PSMasked256, + ssa.OpAMD64VRCP14PSMasked512, ssa.OpAMD64VRCP14PDMasked128, ssa.OpAMD64VRCP14PDMasked256, ssa.OpAMD64VRCP14PDMasked512, - ssa.OpAMD64VRSQRT14PSMasked512, ssa.OpAMD64VRSQRT14PSMasked128, ssa.OpAMD64VRSQRT14PSMasked256, + ssa.OpAMD64VRSQRT14PSMasked512, ssa.OpAMD64VRSQRT14PDMasked128, ssa.OpAMD64VRSQRT14PDMasked256, ssa.OpAMD64VRSQRT14PDMasked512, - ssa.OpAMD64VPAVGWMasked256, - ssa.OpAMD64VPAVGWMasked512, - ssa.OpAMD64VPAVGWMasked128, ssa.OpAMD64VPAVGBMasked128, ssa.OpAMD64VPAVGBMasked256, ssa.OpAMD64VPAVGBMasked512, - ssa.OpAMD64VRNDSCALEPSMasked512, + ssa.OpAMD64VPAVGWMasked128, + ssa.OpAMD64VPAVGWMasked256, + ssa.OpAMD64VPAVGWMasked512, ssa.OpAMD64VRNDSCALEPSMasked128, ssa.OpAMD64VRNDSCALEPSMasked256, + ssa.OpAMD64VRNDSCALEPSMasked512, ssa.OpAMD64VRNDSCALEPDMasked128, ssa.OpAMD64VRNDSCALEPDMasked256, ssa.OpAMD64VRNDSCALEPDMasked512, - ssa.OpAMD64VREDUCEPSMasked512, ssa.OpAMD64VREDUCEPSMasked128, ssa.OpAMD64VREDUCEPSMasked256, + ssa.OpAMD64VREDUCEPSMasked512, ssa.OpAMD64VREDUCEPDMasked128, ssa.OpAMD64VREDUCEPDMasked256, ssa.OpAMD64VREDUCEPDMasked512, - ssa.OpAMD64VDIVPSMasked512, ssa.OpAMD64VDIVPSMasked128, ssa.OpAMD64VDIVPSMasked256, + ssa.OpAMD64VDIVPSMasked512, ssa.OpAMD64VDIVPDMasked128, ssa.OpAMD64VDIVPDMasked256, ssa.OpAMD64VDIVPDMasked512, - ssa.OpAMD64VFMADD132PSMasked512, ssa.OpAMD64VFMADD132PSMasked128, ssa.OpAMD64VFMADD132PSMasked256, + ssa.OpAMD64VFMADD132PSMasked512, ssa.OpAMD64VFMADD132PDMasked128, ssa.OpAMD64VFMADD132PDMasked256, ssa.OpAMD64VFMADD132PDMasked512, - ssa.OpAMD64VFMADD213PSMasked512, ssa.OpAMD64VFMADD213PSMasked128, ssa.OpAMD64VFMADD213PSMasked256, + ssa.OpAMD64VFMADD213PSMasked512, ssa.OpAMD64VFMADD213PDMasked128, ssa.OpAMD64VFMADD213PDMasked256, ssa.OpAMD64VFMADD213PDMasked512, - ssa.OpAMD64VFMADD231PSMasked512, ssa.OpAMD64VFMADD231PSMasked128, ssa.OpAMD64VFMADD231PSMasked256, + ssa.OpAMD64VFMADD231PSMasked512, ssa.OpAMD64VFMADD231PDMasked128, ssa.OpAMD64VFMADD231PDMasked256, ssa.OpAMD64VFMADD231PDMasked512, - ssa.OpAMD64VFMADDSUB132PSMasked512, ssa.OpAMD64VFMADDSUB132PSMasked128, ssa.OpAMD64VFMADDSUB132PSMasked256, + ssa.OpAMD64VFMADDSUB132PSMasked512, ssa.OpAMD64VFMADDSUB132PDMasked128, ssa.OpAMD64VFMADDSUB132PDMasked256, ssa.OpAMD64VFMADDSUB132PDMasked512, - ssa.OpAMD64VFMADDSUB213PSMasked512, ssa.OpAMD64VFMADDSUB213PSMasked128, ssa.OpAMD64VFMADDSUB213PSMasked256, + ssa.OpAMD64VFMADDSUB213PSMasked512, ssa.OpAMD64VFMADDSUB213PDMasked128, ssa.OpAMD64VFMADDSUB213PDMasked256, ssa.OpAMD64VFMADDSUB213PDMasked512, - ssa.OpAMD64VFMADDSUB231PSMasked512, ssa.OpAMD64VFMADDSUB231PSMasked128, ssa.OpAMD64VFMADDSUB231PSMasked256, + ssa.OpAMD64VFMADDSUB231PSMasked512, ssa.OpAMD64VFMADDSUB231PDMasked128, ssa.OpAMD64VFMADDSUB231PDMasked256, ssa.OpAMD64VFMADDSUB231PDMasked512, - ssa.OpAMD64VFMSUB132PSMasked512, ssa.OpAMD64VFMSUB132PSMasked128, ssa.OpAMD64VFMSUB132PSMasked256, + ssa.OpAMD64VFMSUB132PSMasked512, ssa.OpAMD64VFMSUB132PDMasked128, ssa.OpAMD64VFMSUB132PDMasked256, ssa.OpAMD64VFMSUB132PDMasked512, - ssa.OpAMD64VFMSUB213PSMasked512, ssa.OpAMD64VFMSUB213PSMasked128, ssa.OpAMD64VFMSUB213PSMasked256, + ssa.OpAMD64VFMSUB213PSMasked512, ssa.OpAMD64VFMSUB213PDMasked128, ssa.OpAMD64VFMSUB213PDMasked256, ssa.OpAMD64VFMSUB213PDMasked512, - ssa.OpAMD64VFMSUB231PSMasked512, ssa.OpAMD64VFMSUB231PSMasked128, ssa.OpAMD64VFMSUB231PSMasked256, + ssa.OpAMD64VFMSUB231PSMasked512, ssa.OpAMD64VFMSUB231PDMasked128, ssa.OpAMD64VFMSUB231PDMasked256, ssa.OpAMD64VFMSUB231PDMasked512, - ssa.OpAMD64VFMSUBADD132PSMasked512, ssa.OpAMD64VFMSUBADD132PSMasked128, ssa.OpAMD64VFMSUBADD132PSMasked256, + ssa.OpAMD64VFMSUBADD132PSMasked512, ssa.OpAMD64VFMSUBADD132PDMasked128, ssa.OpAMD64VFMSUBADD132PDMasked256, ssa.OpAMD64VFMSUBADD132PDMasked512, - ssa.OpAMD64VFMSUBADD213PSMasked512, ssa.OpAMD64VFMSUBADD213PSMasked128, ssa.OpAMD64VFMSUBADD213PSMasked256, + ssa.OpAMD64VFMSUBADD213PSMasked512, ssa.OpAMD64VFMSUBADD213PDMasked128, ssa.OpAMD64VFMSUBADD213PDMasked256, ssa.OpAMD64VFMSUBADD213PDMasked512, - ssa.OpAMD64VFMSUBADD231PSMasked512, ssa.OpAMD64VFMSUBADD231PSMasked128, ssa.OpAMD64VFMSUBADD231PSMasked256, + ssa.OpAMD64VFMSUBADD231PSMasked512, ssa.OpAMD64VFMSUBADD231PDMasked128, ssa.OpAMD64VFMSUBADD231PDMasked256, ssa.OpAMD64VFMSUBADD231PDMasked512, - ssa.OpAMD64VFNMADD132PSMasked512, ssa.OpAMD64VFNMADD132PSMasked128, ssa.OpAMD64VFNMADD132PSMasked256, + ssa.OpAMD64VFNMADD132PSMasked512, ssa.OpAMD64VFNMADD132PDMasked128, ssa.OpAMD64VFNMADD132PDMasked256, ssa.OpAMD64VFNMADD132PDMasked512, - ssa.OpAMD64VFNMADD213PSMasked512, ssa.OpAMD64VFNMADD213PSMasked128, ssa.OpAMD64VFNMADD213PSMasked256, + ssa.OpAMD64VFNMADD213PSMasked512, ssa.OpAMD64VFNMADD213PDMasked128, ssa.OpAMD64VFNMADD213PDMasked256, ssa.OpAMD64VFNMADD213PDMasked512, - ssa.OpAMD64VFNMADD231PSMasked512, ssa.OpAMD64VFNMADD231PSMasked128, ssa.OpAMD64VFNMADD231PSMasked256, + ssa.OpAMD64VFNMADD231PSMasked512, ssa.OpAMD64VFNMADD231PDMasked128, ssa.OpAMD64VFNMADD231PDMasked256, ssa.OpAMD64VFNMADD231PDMasked512, - ssa.OpAMD64VFNMSUB132PSMasked512, ssa.OpAMD64VFNMSUB132PSMasked128, ssa.OpAMD64VFNMSUB132PSMasked256, + ssa.OpAMD64VFNMSUB132PSMasked512, ssa.OpAMD64VFNMSUB132PDMasked128, ssa.OpAMD64VFNMSUB132PDMasked256, ssa.OpAMD64VFNMSUB132PDMasked512, - ssa.OpAMD64VFNMSUB213PSMasked512, ssa.OpAMD64VFNMSUB213PSMasked128, ssa.OpAMD64VFNMSUB213PSMasked256, + ssa.OpAMD64VFNMSUB213PSMasked512, ssa.OpAMD64VFNMSUB213PDMasked128, ssa.OpAMD64VFNMSUB213PDMasked256, ssa.OpAMD64VFNMSUB213PDMasked512, - ssa.OpAMD64VFNMSUB231PSMasked512, ssa.OpAMD64VFNMSUB231PSMasked128, ssa.OpAMD64VFNMSUB231PSMasked256, + ssa.OpAMD64VFNMSUB231PSMasked512, ssa.OpAMD64VFNMSUB231PDMasked128, ssa.OpAMD64VFNMSUB231PDMasked256, ssa.OpAMD64VFNMSUB231PDMasked512, - ssa.OpAMD64VMAXPSMasked512, ssa.OpAMD64VMAXPSMasked128, ssa.OpAMD64VMAXPSMasked256, + ssa.OpAMD64VMAXPSMasked512, ssa.OpAMD64VMAXPDMasked128, ssa.OpAMD64VMAXPDMasked256, ssa.OpAMD64VMAXPDMasked512, - ssa.OpAMD64VPMAXSWMasked256, - ssa.OpAMD64VPMAXSWMasked512, - ssa.OpAMD64VPMAXSWMasked128, - ssa.OpAMD64VPMAXSDMasked512, - ssa.OpAMD64VPMAXSDMasked128, - ssa.OpAMD64VPMAXSDMasked256, - ssa.OpAMD64VPMAXSQMasked128, - ssa.OpAMD64VPMAXSQMasked256, - ssa.OpAMD64VPMAXSQMasked512, ssa.OpAMD64VPMAXSBMasked128, ssa.OpAMD64VPMAXSBMasked256, ssa.OpAMD64VPMAXSBMasked512, - ssa.OpAMD64VPMAXUWMasked256, - ssa.OpAMD64VPMAXUWMasked512, - ssa.OpAMD64VPMAXUWMasked128, - ssa.OpAMD64VPMAXUDMasked512, - ssa.OpAMD64VPMAXUDMasked128, - ssa.OpAMD64VPMAXUDMasked256, - ssa.OpAMD64VPMAXUQMasked128, - ssa.OpAMD64VPMAXUQMasked256, - ssa.OpAMD64VPMAXUQMasked512, + ssa.OpAMD64VPMAXSWMasked128, + ssa.OpAMD64VPMAXSWMasked256, + ssa.OpAMD64VPMAXSWMasked512, + ssa.OpAMD64VPMAXSDMasked128, + ssa.OpAMD64VPMAXSDMasked256, + ssa.OpAMD64VPMAXSDMasked512, + ssa.OpAMD64VPMAXSQMasked128, + ssa.OpAMD64VPMAXSQMasked256, + ssa.OpAMD64VPMAXSQMasked512, ssa.OpAMD64VPMAXUBMasked128, ssa.OpAMD64VPMAXUBMasked256, ssa.OpAMD64VPMAXUBMasked512, - ssa.OpAMD64VMINPSMasked512, + ssa.OpAMD64VPMAXUWMasked128, + ssa.OpAMD64VPMAXUWMasked256, + ssa.OpAMD64VPMAXUWMasked512, + ssa.OpAMD64VPMAXUDMasked128, + ssa.OpAMD64VPMAXUDMasked256, + ssa.OpAMD64VPMAXUDMasked512, + ssa.OpAMD64VPMAXUQMasked128, + ssa.OpAMD64VPMAXUQMasked256, + ssa.OpAMD64VPMAXUQMasked512, ssa.OpAMD64VMINPSMasked128, ssa.OpAMD64VMINPSMasked256, + ssa.OpAMD64VMINPSMasked512, ssa.OpAMD64VMINPDMasked128, ssa.OpAMD64VMINPDMasked256, ssa.OpAMD64VMINPDMasked512, - ssa.OpAMD64VPMINSWMasked256, - ssa.OpAMD64VPMINSWMasked512, - ssa.OpAMD64VPMINSWMasked128, - ssa.OpAMD64VPMINSDMasked512, - ssa.OpAMD64VPMINSDMasked128, - ssa.OpAMD64VPMINSDMasked256, - ssa.OpAMD64VPMINSQMasked128, - ssa.OpAMD64VPMINSQMasked256, - ssa.OpAMD64VPMINSQMasked512, ssa.OpAMD64VPMINSBMasked128, ssa.OpAMD64VPMINSBMasked256, ssa.OpAMD64VPMINSBMasked512, - ssa.OpAMD64VPMINUWMasked256, - ssa.OpAMD64VPMINUWMasked512, - ssa.OpAMD64VPMINUWMasked128, - ssa.OpAMD64VPMINUDMasked512, - ssa.OpAMD64VPMINUDMasked128, - ssa.OpAMD64VPMINUDMasked256, - ssa.OpAMD64VPMINUQMasked128, - ssa.OpAMD64VPMINUQMasked256, - ssa.OpAMD64VPMINUQMasked512, + ssa.OpAMD64VPMINSWMasked128, + ssa.OpAMD64VPMINSWMasked256, + ssa.OpAMD64VPMINSWMasked512, + ssa.OpAMD64VPMINSDMasked128, + ssa.OpAMD64VPMINSDMasked256, + ssa.OpAMD64VPMINSDMasked512, + ssa.OpAMD64VPMINSQMasked128, + ssa.OpAMD64VPMINSQMasked256, + ssa.OpAMD64VPMINSQMasked512, ssa.OpAMD64VPMINUBMasked128, ssa.OpAMD64VPMINUBMasked256, ssa.OpAMD64VPMINUBMasked512, - ssa.OpAMD64VMULPSMasked512, + ssa.OpAMD64VPMINUWMasked128, + ssa.OpAMD64VPMINUWMasked256, + ssa.OpAMD64VPMINUWMasked512, + ssa.OpAMD64VPMINUDMasked128, + ssa.OpAMD64VPMINUDMasked256, + ssa.OpAMD64VPMINUDMasked512, + ssa.OpAMD64VPMINUQMasked128, + ssa.OpAMD64VPMINUQMasked256, + ssa.OpAMD64VPMINUQMasked512, ssa.OpAMD64VMULPSMasked128, ssa.OpAMD64VMULPSMasked256, + ssa.OpAMD64VMULPSMasked512, ssa.OpAMD64VMULPDMasked128, ssa.OpAMD64VMULPDMasked256, ssa.OpAMD64VMULPDMasked512, - ssa.OpAMD64VSCALEFPSMasked512, ssa.OpAMD64VSCALEFPSMasked128, ssa.OpAMD64VSCALEFPSMasked256, + ssa.OpAMD64VSCALEFPSMasked512, ssa.OpAMD64VSCALEFPDMasked128, ssa.OpAMD64VSCALEFPDMasked256, ssa.OpAMD64VSCALEFPDMasked512, @@ -1206,102 +1181,108 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMULUDQMasked128, ssa.OpAMD64VPMULUDQMasked256, ssa.OpAMD64VPMULUDQMasked512, + ssa.OpAMD64VPMULHWMasked128, ssa.OpAMD64VPMULHWMasked256, ssa.OpAMD64VPMULHWMasked512, - ssa.OpAMD64VPMULHWMasked128, + ssa.OpAMD64VPMULHUWMasked128, ssa.OpAMD64VPMULHUWMasked256, ssa.OpAMD64VPMULHUWMasked512, - ssa.OpAMD64VPMULHUWMasked128, + ssa.OpAMD64VPMULLWMasked128, ssa.OpAMD64VPMULLWMasked256, ssa.OpAMD64VPMULLWMasked512, - ssa.OpAMD64VPMULLWMasked128, - ssa.OpAMD64VPMULLDMasked512, ssa.OpAMD64VPMULLDMasked128, ssa.OpAMD64VPMULLDMasked256, + ssa.OpAMD64VPMULLDMasked512, ssa.OpAMD64VPMULLQMasked128, ssa.OpAMD64VPMULLQMasked256, ssa.OpAMD64VPMULLQMasked512, - ssa.OpAMD64VORPSMasked512, ssa.OpAMD64VORPSMasked128, ssa.OpAMD64VORPSMasked256, + ssa.OpAMD64VORPSMasked512, ssa.OpAMD64VORPDMasked128, ssa.OpAMD64VORPDMasked256, ssa.OpAMD64VORPDMasked512, - ssa.OpAMD64VPORDMasked512, ssa.OpAMD64VPORDMasked128, ssa.OpAMD64VPORDMasked256, + ssa.OpAMD64VPORDMasked512, ssa.OpAMD64VPORQMasked128, ssa.OpAMD64VPORQMasked256, ssa.OpAMD64VPORQMasked512, + ssa.OpAMD64VPMADDWDMasked128, ssa.OpAMD64VPMADDWDMasked256, ssa.OpAMD64VPMADDWDMasked512, - ssa.OpAMD64VPMADDWDMasked128, - ssa.OpAMD64VPDPWSSDMasked512, ssa.OpAMD64VPDPWSSDMasked128, ssa.OpAMD64VPDPWSSDMasked256, - ssa.OpAMD64VPOPCNTWMasked256, - ssa.OpAMD64VPOPCNTWMasked512, - ssa.OpAMD64VPOPCNTWMasked128, - ssa.OpAMD64VPOPCNTDMasked512, - ssa.OpAMD64VPOPCNTDMasked128, - ssa.OpAMD64VPOPCNTDMasked256, - ssa.OpAMD64VPOPCNTQMasked128, - ssa.OpAMD64VPOPCNTQMasked256, - ssa.OpAMD64VPOPCNTQMasked512, + ssa.OpAMD64VPDPWSSDMasked512, ssa.OpAMD64VPOPCNTBMasked128, ssa.OpAMD64VPOPCNTBMasked256, ssa.OpAMD64VPOPCNTBMasked512, - ssa.OpAMD64VPADDSWMasked256, - ssa.OpAMD64VPADDSWMasked512, - ssa.OpAMD64VPADDSWMasked128, + ssa.OpAMD64VPOPCNTWMasked128, + ssa.OpAMD64VPOPCNTWMasked256, + ssa.OpAMD64VPOPCNTWMasked512, + ssa.OpAMD64VPOPCNTDMasked128, + ssa.OpAMD64VPOPCNTDMasked256, + ssa.OpAMD64VPOPCNTDMasked512, + ssa.OpAMD64VPOPCNTQMasked128, + ssa.OpAMD64VPOPCNTQMasked256, + ssa.OpAMD64VPOPCNTQMasked512, ssa.OpAMD64VPADDSBMasked128, ssa.OpAMD64VPADDSBMasked256, ssa.OpAMD64VPADDSBMasked512, - ssa.OpAMD64VPDPWSSDSMasked512, + ssa.OpAMD64VPADDSWMasked128, + ssa.OpAMD64VPADDSWMasked256, + ssa.OpAMD64VPADDSWMasked512, ssa.OpAMD64VPDPWSSDSMasked128, ssa.OpAMD64VPDPWSSDSMasked256, - ssa.OpAMD64VPSUBSWMasked256, - ssa.OpAMD64VPSUBSWMasked512, - ssa.OpAMD64VPSUBSWMasked128, + ssa.OpAMD64VPDPWSSDSMasked512, ssa.OpAMD64VPSUBSBMasked128, ssa.OpAMD64VPSUBSBMasked256, ssa.OpAMD64VPSUBSBMasked512, + ssa.OpAMD64VPSUBSWMasked128, + ssa.OpAMD64VPSUBSWMasked256, + ssa.OpAMD64VPSUBSWMasked512, + ssa.OpAMD64VPMADDUBSWMasked128, ssa.OpAMD64VPMADDUBSWMasked256, ssa.OpAMD64VPMADDUBSWMasked512, - ssa.OpAMD64VPMADDUBSWMasked128, - ssa.OpAMD64VPDPBUSDSMasked512, ssa.OpAMD64VPDPBUSDSMasked128, ssa.OpAMD64VPDPBUSDSMasked256, - ssa.OpAMD64VSQRTPSMasked512, + ssa.OpAMD64VPDPBUSDSMasked512, ssa.OpAMD64VSQRTPSMasked128, ssa.OpAMD64VSQRTPSMasked256, + ssa.OpAMD64VSQRTPSMasked512, ssa.OpAMD64VSQRTPDMasked128, ssa.OpAMD64VSQRTPDMasked256, ssa.OpAMD64VSQRTPDMasked512, - ssa.OpAMD64VPSUBWMasked256, - ssa.OpAMD64VPSUBWMasked512, - ssa.OpAMD64VPSUBWMasked128, - ssa.OpAMD64VPSUBDMasked512, - ssa.OpAMD64VPSUBDMasked128, - ssa.OpAMD64VPSUBDMasked256, - ssa.OpAMD64VPSUBQMasked128, - ssa.OpAMD64VPSUBQMasked256, - ssa.OpAMD64VPSUBQMasked512, + ssa.OpAMD64VSUBPSMasked128, + ssa.OpAMD64VSUBPSMasked256, + ssa.OpAMD64VSUBPSMasked512, + ssa.OpAMD64VSUBPDMasked128, + ssa.OpAMD64VSUBPDMasked256, + ssa.OpAMD64VSUBPDMasked512, ssa.OpAMD64VPSUBBMasked128, ssa.OpAMD64VPSUBBMasked256, ssa.OpAMD64VPSUBBMasked512, - ssa.OpAMD64VPDPBUSDMasked512, + ssa.OpAMD64VPSUBWMasked128, + ssa.OpAMD64VPSUBWMasked256, + ssa.OpAMD64VPSUBWMasked512, + ssa.OpAMD64VPSUBDMasked128, + ssa.OpAMD64VPSUBDMasked256, + ssa.OpAMD64VPSUBDMasked512, + ssa.OpAMD64VPSUBQMasked128, + ssa.OpAMD64VPSUBQMasked256, + ssa.OpAMD64VPSUBQMasked512, ssa.OpAMD64VPDPBUSDMasked128, ssa.OpAMD64VPDPBUSDMasked256, - ssa.OpAMD64VXORPSMasked512, + ssa.OpAMD64VPDPBUSDMasked512, ssa.OpAMD64VXORPSMasked128, ssa.OpAMD64VXORPSMasked256, + ssa.OpAMD64VXORPSMasked512, ssa.OpAMD64VXORPDMasked128, ssa.OpAMD64VXORPDMasked256, ssa.OpAMD64VXORPDMasked512, - ssa.OpAMD64VPXORDMasked512, ssa.OpAMD64VPXORDMasked128, ssa.OpAMD64VPXORDMasked256, + ssa.OpAMD64VPXORDMasked512, ssa.OpAMD64VPXORQMasked128, ssa.OpAMD64VPXORQMasked256, ssa.OpAMD64VPXORQMasked512: diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index add066a3b6d..d6d8246980a 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -194,17 +194,17 @@ (EqualFloat64x4 x y) => (VCMPPD256 [0] x y) (EqualFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [0] x y)) (EqualInt16x16 ...) => (VPCMPEQW256 ...) -(EqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPEQW512 x y)) +(EqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [0] x y)) (EqualInt16x8 ...) => (VPCMPEQW128 ...) -(EqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPEQD512 x y)) +(EqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [0] x y)) (EqualInt32x4 ...) => (VPCMPEQD128 ...) (EqualInt32x8 ...) => (VPCMPEQD256 ...) (EqualInt64x2 ...) => (VPCMPEQQ128 ...) (EqualInt64x4 ...) => (VPCMPEQQ256 ...) -(EqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPEQQ512 x y)) +(EqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [0] x y)) (EqualInt8x16 ...) => (VPCMPEQB128 ...) (EqualInt8x32 ...) => (VPCMPEQB256 ...) -(EqualInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPEQB512 x y)) +(EqualInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [0] x y)) (EqualUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [0] x y)) (EqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [0] x y)) (EqualUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [0] x y)) @@ -348,17 +348,17 @@ (GreaterFloat64x4 x y) => (VCMPPD256 [6] x y) (GreaterFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [6] x y)) (GreaterInt16x16 ...) => (VPCMPGTW256 ...) -(GreaterInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPGTW512 x y)) +(GreaterInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [6] x y)) (GreaterInt16x8 ...) => (VPCMPGTW128 ...) -(GreaterInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPGTD512 x y)) +(GreaterInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [6] x y)) (GreaterInt32x4 ...) => (VPCMPGTD128 ...) (GreaterInt32x8 ...) => (VPCMPGTD256 ...) -(GreaterInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPGTQ128 x y)) +(GreaterInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [6] x y)) (GreaterInt64x4 ...) => (VPCMPGTQ256 ...) -(GreaterInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPGTQ512 x y)) +(GreaterInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [6] x y)) (GreaterInt8x16 ...) => (VPCMPGTB128 ...) (GreaterInt8x32 ...) => (VPCMPGTB256 ...) -(GreaterInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPGTB512 x y)) +(GreaterInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [6] x y)) (GreaterUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [6] x y)) (GreaterUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [6] x y)) (GreaterUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [6] x y)) @@ -635,18 +635,18 @@ (MaskedEqualFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [0] x y (VPMOVVec64x2ToM mask))) (MaskedEqualFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [0] x y (VPMOVVec64x4ToM mask))) (MaskedEqualFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [0] x y (VPMOVVec64x8ToM mask))) -(MaskedEqualInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPEQWMasked256 x y (VPMOVVec16x16ToM mask))) -(MaskedEqualInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPEQWMasked512 x y (VPMOVVec16x32ToM mask))) -(MaskedEqualInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPEQWMasked128 x y (VPMOVVec16x8ToM mask))) -(MaskedEqualInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPEQDMasked512 x y (VPMOVVec32x16ToM mask))) -(MaskedEqualInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPEQDMasked128 x y (VPMOVVec32x4ToM mask))) -(MaskedEqualInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPEQDMasked256 x y (VPMOVVec32x8ToM mask))) -(MaskedEqualInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPEQQMasked128 x y (VPMOVVec64x2ToM mask))) -(MaskedEqualInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPEQQMasked256 x y (VPMOVVec64x4ToM mask))) -(MaskedEqualInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPEQQMasked512 x y (VPMOVVec64x8ToM mask))) -(MaskedEqualInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPEQBMasked128 x y (VPMOVVec8x16ToM mask))) -(MaskedEqualInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPEQBMasked256 x y (VPMOVVec8x32ToM mask))) -(MaskedEqualInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPEQBMasked512 x y (VPMOVVec8x64ToM mask))) +(MaskedEqualInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [0] x y (VPMOVVec16x16ToM mask))) +(MaskedEqualInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [0] x y (VPMOVVec16x32ToM mask))) +(MaskedEqualInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [0] x y (VPMOVVec16x8ToM mask))) +(MaskedEqualInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [0] x y (VPMOVVec32x16ToM mask))) +(MaskedEqualInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [0] x y (VPMOVVec32x4ToM mask))) +(MaskedEqualInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [0] x y (VPMOVVec32x8ToM mask))) +(MaskedEqualInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [0] x y (VPMOVVec64x2ToM mask))) +(MaskedEqualInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [0] x y (VPMOVVec64x4ToM mask))) +(MaskedEqualInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [0] x y (VPMOVVec64x8ToM mask))) +(MaskedEqualInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [0] x y (VPMOVVec8x16ToM mask))) +(MaskedEqualInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [0] x y (VPMOVVec8x32ToM mask))) +(MaskedEqualInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [0] x y (VPMOVVec8x64ToM mask))) (MaskedEqualUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [0] x y (VPMOVVec16x16ToM mask))) (MaskedEqualUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [0] x y (VPMOVVec16x32ToM mask))) (MaskedEqualUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [0] x y (VPMOVVec16x8ToM mask))) @@ -785,18 +785,18 @@ (MaskedGreaterFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [6] x y (VPMOVVec64x2ToM mask))) (MaskedGreaterFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [6] x y (VPMOVVec64x4ToM mask))) (MaskedGreaterFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [6] x y (VPMOVVec64x8ToM mask))) -(MaskedGreaterInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPGTWMasked256 x y (VPMOVVec16x16ToM mask))) -(MaskedGreaterInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPGTWMasked512 x y (VPMOVVec16x32ToM mask))) -(MaskedGreaterInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPGTWMasked128 x y (VPMOVVec16x8ToM mask))) -(MaskedGreaterInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPGTDMasked512 x y (VPMOVVec32x16ToM mask))) -(MaskedGreaterInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPGTDMasked128 x y (VPMOVVec32x4ToM mask))) -(MaskedGreaterInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPGTDMasked256 x y (VPMOVVec32x8ToM mask))) -(MaskedGreaterInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPGTQMasked128 x y (VPMOVVec64x2ToM mask))) -(MaskedGreaterInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPGTQMasked256 x y (VPMOVVec64x4ToM mask))) -(MaskedGreaterInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPGTQMasked512 x y (VPMOVVec64x8ToM mask))) -(MaskedGreaterInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPGTBMasked128 x y (VPMOVVec8x16ToM mask))) -(MaskedGreaterInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPGTBMasked256 x y (VPMOVVec8x32ToM mask))) -(MaskedGreaterInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPGTBMasked512 x y (VPMOVVec8x64ToM mask))) +(MaskedGreaterInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [6] x y (VPMOVVec16x16ToM mask))) +(MaskedGreaterInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [6] x y (VPMOVVec16x32ToM mask))) +(MaskedGreaterInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [6] x y (VPMOVVec16x8ToM mask))) +(MaskedGreaterInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [6] x y (VPMOVVec32x16ToM mask))) +(MaskedGreaterInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [6] x y (VPMOVVec32x4ToM mask))) +(MaskedGreaterInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [6] x y (VPMOVVec32x8ToM mask))) +(MaskedGreaterInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [6] x y (VPMOVVec64x2ToM mask))) +(MaskedGreaterInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [6] x y (VPMOVVec64x4ToM mask))) +(MaskedGreaterInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [6] x y (VPMOVVec64x8ToM mask))) +(MaskedGreaterInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [6] x y (VPMOVVec8x16ToM mask))) +(MaskedGreaterInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [6] x y (VPMOVVec8x32ToM mask))) +(MaskedGreaterInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [6] x y (VPMOVVec8x64ToM mask))) (MaskedGreaterUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [6] x y (VPMOVVec16x16ToM mask))) (MaskedGreaterUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [6] x y (VPMOVVec16x32ToM mask))) (MaskedGreaterUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [6] x y (VPMOVVec16x8ToM mask))) @@ -1130,12 +1130,12 @@ (MaskedSqrtFloat64x2 x mask) => (VSQRTPDMasked128 x (VPMOVVec64x2ToM mask)) (MaskedSqrtFloat64x4 x mask) => (VSQRTPDMasked256 x (VPMOVVec64x4ToM mask)) (MaskedSqrtFloat64x8 x mask) => (VSQRTPDMasked512 x (VPMOVVec64x8ToM mask)) -(MaskedSubFloat32x16 x y mask) => (VADDPSMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedSubFloat32x4 x y mask) => (VADDPSMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedSubFloat32x8 x y mask) => (VADDPSMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedSubFloat64x2 x y mask) => (VADDPDMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedSubFloat64x4 x y mask) => (VADDPDMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedSubFloat64x8 x y mask) => (VADDPDMasked512 x y (VPMOVVec64x8ToM mask)) +(MaskedSubFloat32x16 x y mask) => (VSUBPSMasked512 x y (VPMOVVec32x16ToM mask)) +(MaskedSubFloat32x4 x y mask) => (VSUBPSMasked128 x y (VPMOVVec32x4ToM mask)) +(MaskedSubFloat32x8 x y mask) => (VSUBPSMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedSubFloat64x2 x y mask) => (VSUBPDMasked128 x y (VPMOVVec64x2ToM mask)) +(MaskedSubFloat64x4 x y mask) => (VSUBPDMasked256 x y (VPMOVVec64x4ToM mask)) +(MaskedSubFloat64x8 x y mask) => (VSUBPDMasked512 x y (VPMOVVec64x8ToM mask)) (MaskedSubInt16x16 x y mask) => (VPSUBWMasked256 x y (VPMOVVec16x16ToM mask)) (MaskedSubInt16x32 x y mask) => (VPSUBWMasked512 x y (VPMOVVec16x32ToM mask)) (MaskedSubInt16x8 x y mask) => (VPSUBWMasked128 x y (VPMOVVec16x8ToM mask)) @@ -1473,12 +1473,12 @@ (SqrtFloat64x2 ...) => (VSQRTPD128 ...) (SqrtFloat64x4 ...) => (VSQRTPD256 ...) (SqrtFloat64x8 ...) => (VSQRTPD512 ...) -(SubFloat32x16 ...) => (VADDPS512 ...) -(SubFloat32x4 ...) => (VADDPS128 ...) -(SubFloat32x8 ...) => (VADDPS256 ...) -(SubFloat64x2 ...) => (VADDPD128 ...) -(SubFloat64x4 ...) => (VADDPD256 ...) -(SubFloat64x8 ...) => (VADDPD512 ...) +(SubFloat32x16 ...) => (VSUBPS512 ...) +(SubFloat32x4 ...) => (VSUBPS128 ...) +(SubFloat32x8 ...) => (VSUBPS256 ...) +(SubFloat64x2 ...) => (VSUBPD128 ...) +(SubFloat64x4 ...) => (VSUBPD256 ...) +(SubFloat64x8 ...) => (VSUBPD512 ...) (SubInt16x16 ...) => (VPSUBW256 ...) (SubInt16x32 ...) => (VPSUBW512 ...) (SubInt16x8 ...) => (VPSUBW128 ...) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go index 651a4365c7c..17d250421f3 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go @@ -57,6 +57,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VSCALEFPSMasked512", argLength: 3, reg: fp2k1fp1, asm: "VSCALEFPS", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VORPSMasked512", argLength: 3, reg: fp2k1fp1, asm: "VORPS", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VSQRTPSMasked512", argLength: 2, reg: fp1k1fp1, asm: "VSQRTPS", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VSUBPSMasked512", argLength: 3, reg: fp2k1fp1, asm: "VSUBPS", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VXORPSMasked512", argLength: 3, reg: fp2k1fp1, asm: "VXORPS", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VMAXPS512", argLength: 2, reg: fp21, asm: "VMAXPS", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VMINPS512", argLength: 2, reg: fp21, asm: "VMINPS", commutative: true, typ: "Vec512", resultInArg0: false}, @@ -64,6 +65,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VSCALEFPS512", argLength: 2, reg: fp21, asm: "VSCALEFPS", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VORPS512", argLength: 2, reg: fp21, asm: "VORPS", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VSQRTPS512", argLength: 1, reg: fp11, asm: "VSQRTPS", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VSUBPS512", argLength: 2, reg: fp21, asm: "VSUBPS", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VXORPS512", argLength: 2, reg: fp21, asm: "VXORPS", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VADDPS128", argLength: 2, reg: fp21, asm: "VADDPS", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VADDSUBPS128", argLength: 2, reg: fp21, asm: "VADDSUBPS", commutative: false, typ: "Vec128", resultInArg0: false}, @@ -120,6 +122,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VSCALEFPSMasked128", argLength: 3, reg: fp2k1fp1, asm: "VSCALEFPS", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VORPSMasked128", argLength: 3, reg: fp2k1fp1, asm: "VORPS", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VSQRTPSMasked128", argLength: 2, reg: fp1k1fp1, asm: "VSQRTPS", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VSUBPSMasked128", argLength: 3, reg: fp2k1fp1, asm: "VSUBPS", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VXORPSMasked128", argLength: 3, reg: fp2k1fp1, asm: "VXORPS", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VMAXPS128", argLength: 2, reg: fp21, asm: "VMAXPS", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VMINPS128", argLength: 2, reg: fp21, asm: "VMINPS", commutative: true, typ: "Vec128", resultInArg0: false}, @@ -129,6 +132,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VHADDPS128", argLength: 2, reg: fp21, asm: "VHADDPS", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VHSUBPS128", argLength: 2, reg: fp21, asm: "VHSUBPS", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VSQRTPS128", argLength: 1, reg: fp11, asm: "VSQRTPS", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VSUBPS128", argLength: 2, reg: fp21, asm: "VSUBPS", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VXORPS128", argLength: 2, reg: fp21, asm: "VXORPS", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VADDPS256", argLength: 2, reg: fp21, asm: "VADDPS", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VADDSUBPS256", argLength: 2, reg: fp21, asm: "VADDSUBPS", commutative: false, typ: "Vec256", resultInArg0: false}, @@ -185,6 +189,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VSCALEFPSMasked256", argLength: 3, reg: fp2k1fp1, asm: "VSCALEFPS", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VORPSMasked256", argLength: 3, reg: fp2k1fp1, asm: "VORPS", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VSQRTPSMasked256", argLength: 2, reg: fp1k1fp1, asm: "VSQRTPS", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VSUBPSMasked256", argLength: 3, reg: fp2k1fp1, asm: "VSUBPS", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VXORPSMasked256", argLength: 3, reg: fp2k1fp1, asm: "VXORPS", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VMAXPS256", argLength: 2, reg: fp21, asm: "VMAXPS", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VMINPS256", argLength: 2, reg: fp21, asm: "VMINPS", commutative: true, typ: "Vec256", resultInArg0: false}, @@ -194,6 +199,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VHADDPS256", argLength: 2, reg: fp21, asm: "VHADDPS", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VHSUBPS256", argLength: 2, reg: fp21, asm: "VHSUBPS", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VSQRTPS256", argLength: 1, reg: fp11, asm: "VSQRTPS", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VSUBPS256", argLength: 2, reg: fp21, asm: "VSUBPS", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VXORPS256", argLength: 2, reg: fp21, asm: "VXORPS", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VADDPD128", argLength: 2, reg: fp21, asm: "VADDPD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VADDSUBPD128", argLength: 2, reg: fp21, asm: "VADDSUBPD", commutative: false, typ: "Vec128", resultInArg0: false}, @@ -250,6 +256,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VSCALEFPDMasked128", argLength: 3, reg: fp2k1fp1, asm: "VSCALEFPD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VORPDMasked128", argLength: 3, reg: fp2k1fp1, asm: "VORPD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VSQRTPDMasked128", argLength: 2, reg: fp1k1fp1, asm: "VSQRTPD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VSUBPDMasked128", argLength: 3, reg: fp2k1fp1, asm: "VSUBPD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VXORPDMasked128", argLength: 3, reg: fp2k1fp1, asm: "VXORPD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VMAXPD128", argLength: 2, reg: fp21, asm: "VMAXPD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VMINPD128", argLength: 2, reg: fp21, asm: "VMINPD", commutative: true, typ: "Vec128", resultInArg0: false}, @@ -259,6 +266,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VHADDPD128", argLength: 2, reg: fp21, asm: "VHADDPD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VHSUBPD128", argLength: 2, reg: fp21, asm: "VHSUBPD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VSQRTPD128", argLength: 1, reg: fp11, asm: "VSQRTPD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VSUBPD128", argLength: 2, reg: fp21, asm: "VSUBPD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VXORPD128", argLength: 2, reg: fp21, asm: "VXORPD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VADDPD256", argLength: 2, reg: fp21, asm: "VADDPD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VADDSUBPD256", argLength: 2, reg: fp21, asm: "VADDSUBPD", commutative: false, typ: "Vec256", resultInArg0: false}, @@ -315,6 +323,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VSCALEFPDMasked256", argLength: 3, reg: fp2k1fp1, asm: "VSCALEFPD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VORPDMasked256", argLength: 3, reg: fp2k1fp1, asm: "VORPD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VSQRTPDMasked256", argLength: 2, reg: fp1k1fp1, asm: "VSQRTPD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VSUBPDMasked256", argLength: 3, reg: fp2k1fp1, asm: "VSUBPD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VXORPDMasked256", argLength: 3, reg: fp2k1fp1, asm: "VXORPD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VMAXPD256", argLength: 2, reg: fp21, asm: "VMAXPD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VMINPD256", argLength: 2, reg: fp21, asm: "VMINPD", commutative: true, typ: "Vec256", resultInArg0: false}, @@ -324,6 +333,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VHADDPD256", argLength: 2, reg: fp21, asm: "VHADDPD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VHSUBPD256", argLength: 2, reg: fp21, asm: "VHSUBPD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VSQRTPD256", argLength: 1, reg: fp11, asm: "VSQRTPD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VSUBPD256", argLength: 2, reg: fp21, asm: "VSUBPD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VXORPD256", argLength: 2, reg: fp21, asm: "VXORPD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VADDPD512", argLength: 2, reg: fp21, asm: "VADDPD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VANDPD512", argLength: 2, reg: fp21, asm: "VANDPD", commutative: true, typ: "Vec512", resultInArg0: false}, @@ -379,6 +389,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VSCALEFPDMasked512", argLength: 3, reg: fp2k1fp1, asm: "VSCALEFPD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VORPDMasked512", argLength: 3, reg: fp2k1fp1, asm: "VORPD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VSQRTPDMasked512", argLength: 2, reg: fp1k1fp1, asm: "VSQRTPD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VSUBPDMasked512", argLength: 3, reg: fp2k1fp1, asm: "VSUBPD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VXORPDMasked512", argLength: 3, reg: fp2k1fp1, asm: "VXORPD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VMAXPD512", argLength: 2, reg: fp21, asm: "VMAXPD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VMINPD512", argLength: 2, reg: fp21, asm: "VMINPD", commutative: true, typ: "Vec512", resultInArg0: false}, @@ -386,17 +397,14 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VSCALEFPD512", argLength: 2, reg: fp21, asm: "VSCALEFPD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VORPD512", argLength: 2, reg: fp21, asm: "VORPD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VSQRTPD512", argLength: 1, reg: fp11, asm: "VSQRTPD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VSUBPD512", argLength: 2, reg: fp21, asm: "VSUBPD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VXORPD512", argLength: 2, reg: fp21, asm: "VXORPD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPABSW256", argLength: 1, reg: fp11, asm: "VPABSW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPADDW256", argLength: 2, reg: fp21, asm: "VPADDW", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPAND256", argLength: 2, reg: fp21, asm: "VPAND", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPANDN256", argLength: 2, reg: fp21, asm: "VPANDN", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPCMPEQW256", argLength: 2, reg: fp21, asm: "VPCMPEQW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPCMPGTW256", argLength: 2, reg: fp21, asm: "VPCMPGTW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPABSWMasked256", argLength: 2, reg: fp1k1fp1, asm: "VPABSW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPADDWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPADDW", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPCMPEQWMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPEQW", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPCMPGTWMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPGTW", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPMAXSWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMAXSW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINSWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMINSW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULHWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMULHW", commutative: true, typ: "Vec256", resultInArg0: false}, @@ -410,7 +418,6 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VPMINSW256", argLength: 2, reg: fp21, asm: "VPMINSW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULHW256", argLength: 2, reg: fp21, asm: "VPMULHW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULLW256", argLength: 2, reg: fp21, asm: "VPMULLW", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPOR256", argLength: 2, reg: fp21, asm: "VPOR", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMADDWD256", argLength: 2, reg: fp21, asm: "VPMADDWD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPHADDW256", argLength: 2, reg: fp21, asm: "VPHADDW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPHSUBW256", argLength: 2, reg: fp21, asm: "VPHSUBW", commutative: false, typ: "Vec256", resultInArg0: false}, @@ -421,15 +428,10 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VPSUBSW256", argLength: 2, reg: fp21, asm: "VPSUBSW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSIGNW256", argLength: 2, reg: fp21, asm: "VPSIGNW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSUBW256", argLength: 2, reg: fp21, asm: "VPSUBW", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPXOR256", argLength: 2, reg: fp21, asm: "VPXOR", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPABSW512", argLength: 1, reg: fp11, asm: "VPABSW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPADDW512", argLength: 2, reg: fp21, asm: "VPADDW", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPCMPEQW512", argLength: 2, reg: fp2k1, asm: "VPCMPEQW", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPCMPGTW512", argLength: 2, reg: fp2k1, asm: "VPCMPGTW", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPABSWMasked512", argLength: 2, reg: fp1k1fp1, asm: "VPABSW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPADDWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPADDW", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPCMPEQWMasked512", argLength: 3, reg: fp2k1k1, asm: "VPCMPEQW", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPCMPGTWMasked512", argLength: 3, reg: fp2k1k1, asm: "VPCMPGTW", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPMAXSWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMAXSW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINSWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMINSW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMULHWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMULHW", commutative: true, typ: "Vec512", resultInArg0: false}, @@ -450,14 +452,10 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VPSUBW512", argLength: 2, reg: fp21, asm: "VPSUBW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPABSW128", argLength: 1, reg: fp11, asm: "VPABSW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPADDW128", argLength: 2, reg: fp21, asm: "VPADDW", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPAND128", argLength: 2, reg: fp21, asm: "VPAND", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPANDN128", argLength: 2, reg: fp21, asm: "VPANDN", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPCMPEQW128", argLength: 2, reg: fp21, asm: "VPCMPEQW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPCMPGTW128", argLength: 2, reg: fp21, asm: "VPCMPGTW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPABSWMasked128", argLength: 2, reg: fp1k1fp1, asm: "VPABSW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPADDWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPADDW", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPCMPEQWMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPEQW", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPCMPGTWMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPGTW", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPMAXSWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMAXSW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINSWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMINSW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULHWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMULHW", commutative: true, typ: "Vec128", resultInArg0: false}, @@ -471,7 +469,6 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VPMINSW128", argLength: 2, reg: fp21, asm: "VPMINSW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULHW128", argLength: 2, reg: fp21, asm: "VPMULHW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULLW128", argLength: 2, reg: fp21, asm: "VPMULLW", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPOR128", argLength: 2, reg: fp21, asm: "VPOR", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMADDWD128", argLength: 2, reg: fp21, asm: "VPMADDWD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPHADDW128", argLength: 2, reg: fp21, asm: "VPHADDW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPHSUBW128", argLength: 2, reg: fp21, asm: "VPHSUBW", commutative: false, typ: "Vec128", resultInArg0: false}, @@ -482,19 +479,14 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VPSUBSW128", argLength: 2, reg: fp21, asm: "VPSUBSW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSIGNW128", argLength: 2, reg: fp21, asm: "VPSIGNW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSUBW128", argLength: 2, reg: fp21, asm: "VPSUBW", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPXOR128", argLength: 2, reg: fp21, asm: "VPXOR", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPABSD512", argLength: 1, reg: fp11, asm: "VPABSD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPADDD512", argLength: 2, reg: fp21, asm: "VPADDD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPANDD512", argLength: 2, reg: fp21, asm: "VPANDD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPANDND512", argLength: 2, reg: fp21, asm: "VPANDND", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPCMPEQD512", argLength: 2, reg: fp2k1, asm: "VPCMPEQD", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPCMPGTD512", argLength: 2, reg: fp2k1, asm: "VPCMPGTD", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPABSDMasked512", argLength: 2, reg: fp1k1fp1, asm: "VPABSD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPADDDMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPADDD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPANDDMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPANDD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPANDNDMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPANDND", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPCMPEQDMasked512", argLength: 3, reg: fp2k1k1, asm: "VPCMPEQD", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPCMPGTDMasked512", argLength: 3, reg: fp2k1k1, asm: "VPCMPGTD", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPMAXSDMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMAXSD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINSDMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMINSD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMULLDMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMULLD", commutative: true, typ: "Vec512", resultInArg0: false}, @@ -525,8 +517,6 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VPADDDMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPADDD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPANDDMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPANDD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPANDNDMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPANDND", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPCMPEQDMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPEQD", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPCMPGTDMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPGTD", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPMAXSDMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMAXSD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINSDMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMINSD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULLDMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMULLD", commutative: true, typ: "Vec128", resultInArg0: false}, @@ -559,8 +549,6 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VPADDDMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPADDD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPANDDMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPANDD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPANDNDMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPANDND", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPCMPEQDMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPEQD", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPCMPGTDMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPGTD", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPMAXSDMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMAXSD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINSDMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMINSD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULLDMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMULLD", commutative: true, typ: "Vec256", resultInArg0: false}, @@ -588,13 +576,10 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VPABSQ128", argLength: 1, reg: fp11, asm: "VPABSQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPADDQ128", argLength: 2, reg: fp21, asm: "VPADDQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPCMPEQQ128", argLength: 2, reg: fp21, asm: "VPCMPEQQ", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPCMPGTQ128", argLength: 2, reg: fp2k1, asm: "VPCMPGTQ", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPABSQMasked128", argLength: 2, reg: fp1k1fp1, asm: "VPABSQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPADDQMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPADDQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPANDQMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPANDQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPANDNQMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPANDNQ", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPCMPEQQMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPEQQ", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPCMPGTQMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPGTQ", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPMAXSQMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMAXSQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINSQMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMINSQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULDQMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMULDQ", commutative: true, typ: "Vec128", resultInArg0: false}, @@ -616,8 +601,6 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VPADDQMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPADDQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPANDQMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPANDQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPANDNQMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPANDNQ", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPCMPEQQMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPEQQ", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPCMPGTQMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPGTQ", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPMAXSQMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMAXSQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINSQMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMINSQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULDQMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMULDQ", commutative: true, typ: "Vec256", resultInArg0: false}, @@ -635,14 +618,10 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VPADDQ512", argLength: 2, reg: fp21, asm: "VPADDQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPANDQ512", argLength: 2, reg: fp21, asm: "VPANDQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPANDNQ512", argLength: 2, reg: fp21, asm: "VPANDNQ", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPCMPEQQ512", argLength: 2, reg: fp2k1, asm: "VPCMPEQQ", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPCMPGTQ512", argLength: 2, reg: fp2k1, asm: "VPCMPGTQ", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPABSQMasked512", argLength: 2, reg: fp1k1fp1, asm: "VPABSQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPADDQMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPADDQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPANDQMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPANDQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPANDNQMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPANDNQ", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPCMPEQQMasked512", argLength: 3, reg: fp2k1k1, asm: "VPCMPEQQ", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPCMPGTQMasked512", argLength: 3, reg: fp2k1k1, asm: "VPCMPGTQ", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPMAXSQMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMAXSQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINSQMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMINSQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMULDQMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMULDQ", commutative: true, typ: "Vec512", resultInArg0: false}, @@ -661,12 +640,12 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VPXORQ512", argLength: 2, reg: fp21, asm: "VPXORQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPABSB128", argLength: 1, reg: fp11, asm: "VPABSB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPADDB128", argLength: 2, reg: fp21, asm: "VPADDB", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPAND128", argLength: 2, reg: fp21, asm: "VPAND", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPANDN128", argLength: 2, reg: fp21, asm: "VPANDN", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPCMPEQB128", argLength: 2, reg: fp21, asm: "VPCMPEQB", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPCMPGTB128", argLength: 2, reg: fp21, asm: "VPCMPGTB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPABSBMasked128", argLength: 2, reg: fp1k1fp1, asm: "VPABSB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPADDBMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPADDB", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPCMPEQBMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPEQB", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPCMPGTBMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPGTB", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPMAXSBMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMAXSB", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINSBMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMINSB", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPOPCNTBMasked128", argLength: 2, reg: fp1k1fp1, asm: "VPOPCNTB", commutative: false, typ: "Vec128", resultInArg0: false}, @@ -675,19 +654,21 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VPSUBBMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPSUBB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMAXSB128", argLength: 2, reg: fp21, asm: "VPMAXSB", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINSB128", argLength: 2, reg: fp21, asm: "VPMINSB", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPOR128", argLength: 2, reg: fp21, asm: "VPOR", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPOPCNTB128", argLength: 1, reg: fp11, asm: "VPOPCNTB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPADDSB128", argLength: 2, reg: fp21, asm: "VPADDSB", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPSUBSB128", argLength: 2, reg: fp21, asm: "VPSUBSB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSIGNB128", argLength: 2, reg: fp21, asm: "VPSIGNB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSUBB128", argLength: 2, reg: fp21, asm: "VPSUBB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPXOR128", argLength: 2, reg: fp21, asm: "VPXOR", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPABSB256", argLength: 1, reg: fp11, asm: "VPABSB", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPADDB256", argLength: 2, reg: fp21, asm: "VPADDB", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPAND256", argLength: 2, reg: fp21, asm: "VPAND", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPANDN256", argLength: 2, reg: fp21, asm: "VPANDN", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPCMPEQB256", argLength: 2, reg: fp21, asm: "VPCMPEQB", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPCMPGTB256", argLength: 2, reg: fp21, asm: "VPCMPGTB", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPABSBMasked256", argLength: 2, reg: fp1k1fp1, asm: "VPABSB", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPADDBMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPADDB", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPCMPEQBMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPEQB", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPCMPGTBMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPGTB", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPMAXSBMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMAXSB", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINSBMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMINSB", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPOPCNTBMasked256", argLength: 2, reg: fp1k1fp1, asm: "VPOPCNTB", commutative: false, typ: "Vec256", resultInArg0: false}, @@ -696,19 +677,17 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VPSUBBMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPSUBB", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMAXSB256", argLength: 2, reg: fp21, asm: "VPMAXSB", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINSB256", argLength: 2, reg: fp21, asm: "VPMINSB", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPOR256", argLength: 2, reg: fp21, asm: "VPOR", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPOPCNTB256", argLength: 1, reg: fp11, asm: "VPOPCNTB", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPADDSB256", argLength: 2, reg: fp21, asm: "VPADDSB", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPSUBSB256", argLength: 2, reg: fp21, asm: "VPSUBSB", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSIGNB256", argLength: 2, reg: fp21, asm: "VPSIGNB", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSUBB256", argLength: 2, reg: fp21, asm: "VPSUBB", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPXOR256", argLength: 2, reg: fp21, asm: "VPXOR", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPABSB512", argLength: 1, reg: fp11, asm: "VPABSB", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPADDB512", argLength: 2, reg: fp21, asm: "VPADDB", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPCMPEQB512", argLength: 2, reg: fp2k1, asm: "VPCMPEQB", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPCMPGTB512", argLength: 2, reg: fp2k1, asm: "VPCMPGTB", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPABSBMasked512", argLength: 2, reg: fp1k1fp1, asm: "VPABSB", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPADDBMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPADDB", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPCMPEQBMasked512", argLength: 3, reg: fp2k1k1, asm: "VPCMPEQB", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPCMPGTBMasked512", argLength: 3, reg: fp2k1k1, asm: "VPCMPGTB", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPMAXSBMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMAXSB", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINSBMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMINSB", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPOPCNTBMasked512", argLength: 2, reg: fp1k1fp1, asm: "VPOPCNTB", commutative: false, typ: "Vec512", resultInArg0: false}, @@ -841,29 +820,29 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VREDUCEPDMasked512", argLength: 2, reg: fp1k1fp1, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VCMPPDMasked512", argLength: 3, reg: fp2k1k1, asm: "VCMPPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPW256", argLength: 2, reg: fp2k1, asm: "VPCMPW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPWMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPW512", argLength: 2, reg: fp2k1, asm: "VPCMPW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPWMasked512", argLength: 3, reg: fp2k1k1, asm: "VPCMPW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, + {name: "VPCMPWMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPCMPW512", argLength: 2, reg: fp2k1, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPCMPWMasked512", argLength: 3, reg: fp2k1k1, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPW128", argLength: 2, reg: fp2k1, asm: "VPCMPW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPWMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPD512", argLength: 2, reg: fp2k1, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPDMasked512", argLength: 3, reg: fp2k1k1, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, + {name: "VPCMPWMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPCMPD512", argLength: 2, reg: fp2k1, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPCMPDMasked512", argLength: 3, reg: fp2k1k1, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPD128", argLength: 2, reg: fp2k1, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPDMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, + {name: "VPCMPDMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPD256", argLength: 2, reg: fp2k1, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPDMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, + {name: "VPCMPDMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPQ128", argLength: 2, reg: fp2k1, asm: "VPCMPQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPQMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, + {name: "VPCMPQMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPQ256", argLength: 2, reg: fp2k1, asm: "VPCMPQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPQMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPQ512", argLength: 2, reg: fp2k1, asm: "VPCMPQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPQMasked512", argLength: 3, reg: fp2k1k1, asm: "VPCMPQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, + {name: "VPCMPQMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPCMPQ512", argLength: 2, reg: fp2k1, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPCMPQMasked512", argLength: 3, reg: fp2k1k1, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPB128", argLength: 2, reg: fp2k1, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPBMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, + {name: "VPCMPBMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPB256", argLength: 2, reg: fp2k1, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPBMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPB512", argLength: 2, reg: fp2k1, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPBMasked512", argLength: 3, reg: fp2k1k1, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, + {name: "VPCMPBMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPCMPB512", argLength: 2, reg: fp2k1, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPCMPBMasked512", argLength: 3, reg: fp2k1k1, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPUW256", argLength: 2, reg: fp2k1, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPUWMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPUW512", argLength: 2, reg: fp2k1, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index d2e86702d83..ac47bad525e 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1250,6 +1250,7 @@ const ( OpAMD64VSCALEFPSMasked512 OpAMD64VORPSMasked512 OpAMD64VSQRTPSMasked512 + OpAMD64VSUBPSMasked512 OpAMD64VXORPSMasked512 OpAMD64VMAXPS512 OpAMD64VMINPS512 @@ -1257,6 +1258,7 @@ const ( OpAMD64VSCALEFPS512 OpAMD64VORPS512 OpAMD64VSQRTPS512 + OpAMD64VSUBPS512 OpAMD64VXORPS512 OpAMD64VADDPS128 OpAMD64VADDSUBPS128 @@ -1313,6 +1315,7 @@ const ( OpAMD64VSCALEFPSMasked128 OpAMD64VORPSMasked128 OpAMD64VSQRTPSMasked128 + OpAMD64VSUBPSMasked128 OpAMD64VXORPSMasked128 OpAMD64VMAXPS128 OpAMD64VMINPS128 @@ -1322,6 +1325,7 @@ const ( OpAMD64VHADDPS128 OpAMD64VHSUBPS128 OpAMD64VSQRTPS128 + OpAMD64VSUBPS128 OpAMD64VXORPS128 OpAMD64VADDPS256 OpAMD64VADDSUBPS256 @@ -1378,6 +1382,7 @@ const ( OpAMD64VSCALEFPSMasked256 OpAMD64VORPSMasked256 OpAMD64VSQRTPSMasked256 + OpAMD64VSUBPSMasked256 OpAMD64VXORPSMasked256 OpAMD64VMAXPS256 OpAMD64VMINPS256 @@ -1387,6 +1392,7 @@ const ( OpAMD64VHADDPS256 OpAMD64VHSUBPS256 OpAMD64VSQRTPS256 + OpAMD64VSUBPS256 OpAMD64VXORPS256 OpAMD64VADDPD128 OpAMD64VADDSUBPD128 @@ -1443,6 +1449,7 @@ const ( OpAMD64VSCALEFPDMasked128 OpAMD64VORPDMasked128 OpAMD64VSQRTPDMasked128 + OpAMD64VSUBPDMasked128 OpAMD64VXORPDMasked128 OpAMD64VMAXPD128 OpAMD64VMINPD128 @@ -1452,6 +1459,7 @@ const ( OpAMD64VHADDPD128 OpAMD64VHSUBPD128 OpAMD64VSQRTPD128 + OpAMD64VSUBPD128 OpAMD64VXORPD128 OpAMD64VADDPD256 OpAMD64VADDSUBPD256 @@ -1508,6 +1516,7 @@ const ( OpAMD64VSCALEFPDMasked256 OpAMD64VORPDMasked256 OpAMD64VSQRTPDMasked256 + OpAMD64VSUBPDMasked256 OpAMD64VXORPDMasked256 OpAMD64VMAXPD256 OpAMD64VMINPD256 @@ -1517,6 +1526,7 @@ const ( OpAMD64VHADDPD256 OpAMD64VHSUBPD256 OpAMD64VSQRTPD256 + OpAMD64VSUBPD256 OpAMD64VXORPD256 OpAMD64VADDPD512 OpAMD64VANDPD512 @@ -1572,6 +1582,7 @@ const ( OpAMD64VSCALEFPDMasked512 OpAMD64VORPDMasked512 OpAMD64VSQRTPDMasked512 + OpAMD64VSUBPDMasked512 OpAMD64VXORPDMasked512 OpAMD64VMAXPD512 OpAMD64VMINPD512 @@ -1579,17 +1590,14 @@ const ( OpAMD64VSCALEFPD512 OpAMD64VORPD512 OpAMD64VSQRTPD512 + OpAMD64VSUBPD512 OpAMD64VXORPD512 OpAMD64VPABSW256 OpAMD64VPADDW256 - OpAMD64VPAND256 - OpAMD64VPANDN256 OpAMD64VPCMPEQW256 OpAMD64VPCMPGTW256 OpAMD64VPABSWMasked256 OpAMD64VPADDWMasked256 - OpAMD64VPCMPEQWMasked256 - OpAMD64VPCMPGTWMasked256 OpAMD64VPMAXSWMasked256 OpAMD64VPMINSWMasked256 OpAMD64VPMULHWMasked256 @@ -1603,7 +1611,6 @@ const ( OpAMD64VPMINSW256 OpAMD64VPMULHW256 OpAMD64VPMULLW256 - OpAMD64VPOR256 OpAMD64VPMADDWD256 OpAMD64VPHADDW256 OpAMD64VPHSUBW256 @@ -1614,15 +1621,10 @@ const ( OpAMD64VPSUBSW256 OpAMD64VPSIGNW256 OpAMD64VPSUBW256 - OpAMD64VPXOR256 OpAMD64VPABSW512 OpAMD64VPADDW512 - OpAMD64VPCMPEQW512 - OpAMD64VPCMPGTW512 OpAMD64VPABSWMasked512 OpAMD64VPADDWMasked512 - OpAMD64VPCMPEQWMasked512 - OpAMD64VPCMPGTWMasked512 OpAMD64VPMAXSWMasked512 OpAMD64VPMINSWMasked512 OpAMD64VPMULHWMasked512 @@ -1643,14 +1645,10 @@ const ( OpAMD64VPSUBW512 OpAMD64VPABSW128 OpAMD64VPADDW128 - OpAMD64VPAND128 - OpAMD64VPANDN128 OpAMD64VPCMPEQW128 OpAMD64VPCMPGTW128 OpAMD64VPABSWMasked128 OpAMD64VPADDWMasked128 - OpAMD64VPCMPEQWMasked128 - OpAMD64VPCMPGTWMasked128 OpAMD64VPMAXSWMasked128 OpAMD64VPMINSWMasked128 OpAMD64VPMULHWMasked128 @@ -1664,7 +1662,6 @@ const ( OpAMD64VPMINSW128 OpAMD64VPMULHW128 OpAMD64VPMULLW128 - OpAMD64VPOR128 OpAMD64VPMADDWD128 OpAMD64VPHADDW128 OpAMD64VPHSUBW128 @@ -1675,19 +1672,14 @@ const ( OpAMD64VPSUBSW128 OpAMD64VPSIGNW128 OpAMD64VPSUBW128 - OpAMD64VPXOR128 OpAMD64VPABSD512 OpAMD64VPADDD512 OpAMD64VPANDD512 OpAMD64VPANDND512 - OpAMD64VPCMPEQD512 - OpAMD64VPCMPGTD512 OpAMD64VPABSDMasked512 OpAMD64VPADDDMasked512 OpAMD64VPANDDMasked512 OpAMD64VPANDNDMasked512 - OpAMD64VPCMPEQDMasked512 - OpAMD64VPCMPGTDMasked512 OpAMD64VPMAXSDMasked512 OpAMD64VPMINSDMasked512 OpAMD64VPMULLDMasked512 @@ -1718,8 +1710,6 @@ const ( OpAMD64VPADDDMasked128 OpAMD64VPANDDMasked128 OpAMD64VPANDNDMasked128 - OpAMD64VPCMPEQDMasked128 - OpAMD64VPCMPGTDMasked128 OpAMD64VPMAXSDMasked128 OpAMD64VPMINSDMasked128 OpAMD64VPMULLDMasked128 @@ -1752,8 +1742,6 @@ const ( OpAMD64VPADDDMasked256 OpAMD64VPANDDMasked256 OpAMD64VPANDNDMasked256 - OpAMD64VPCMPEQDMasked256 - OpAMD64VPCMPGTDMasked256 OpAMD64VPMAXSDMasked256 OpAMD64VPMINSDMasked256 OpAMD64VPMULLDMasked256 @@ -1781,13 +1769,10 @@ const ( OpAMD64VPABSQ128 OpAMD64VPADDQ128 OpAMD64VPCMPEQQ128 - OpAMD64VPCMPGTQ128 OpAMD64VPABSQMasked128 OpAMD64VPADDQMasked128 OpAMD64VPANDQMasked128 OpAMD64VPANDNQMasked128 - OpAMD64VPCMPEQQMasked128 - OpAMD64VPCMPGTQMasked128 OpAMD64VPMAXSQMasked128 OpAMD64VPMINSQMasked128 OpAMD64VPMULDQMasked128 @@ -1809,8 +1794,6 @@ const ( OpAMD64VPADDQMasked256 OpAMD64VPANDQMasked256 OpAMD64VPANDNQMasked256 - OpAMD64VPCMPEQQMasked256 - OpAMD64VPCMPGTQMasked256 OpAMD64VPMAXSQMasked256 OpAMD64VPMINSQMasked256 OpAMD64VPMULDQMasked256 @@ -1828,14 +1811,10 @@ const ( OpAMD64VPADDQ512 OpAMD64VPANDQ512 OpAMD64VPANDNQ512 - OpAMD64VPCMPEQQ512 - OpAMD64VPCMPGTQ512 OpAMD64VPABSQMasked512 OpAMD64VPADDQMasked512 OpAMD64VPANDQMasked512 OpAMD64VPANDNQMasked512 - OpAMD64VPCMPEQQMasked512 - OpAMD64VPCMPGTQMasked512 OpAMD64VPMAXSQMasked512 OpAMD64VPMINSQMasked512 OpAMD64VPMULDQMasked512 @@ -1854,12 +1833,12 @@ const ( OpAMD64VPXORQ512 OpAMD64VPABSB128 OpAMD64VPADDB128 + OpAMD64VPAND128 + OpAMD64VPANDN128 OpAMD64VPCMPEQB128 OpAMD64VPCMPGTB128 OpAMD64VPABSBMasked128 OpAMD64VPADDBMasked128 - OpAMD64VPCMPEQBMasked128 - OpAMD64VPCMPGTBMasked128 OpAMD64VPMAXSBMasked128 OpAMD64VPMINSBMasked128 OpAMD64VPOPCNTBMasked128 @@ -1868,19 +1847,21 @@ const ( OpAMD64VPSUBBMasked128 OpAMD64VPMAXSB128 OpAMD64VPMINSB128 + OpAMD64VPOR128 OpAMD64VPOPCNTB128 OpAMD64VPADDSB128 OpAMD64VPSUBSB128 OpAMD64VPSIGNB128 OpAMD64VPSUBB128 + OpAMD64VPXOR128 OpAMD64VPABSB256 OpAMD64VPADDB256 + OpAMD64VPAND256 + OpAMD64VPANDN256 OpAMD64VPCMPEQB256 OpAMD64VPCMPGTB256 OpAMD64VPABSBMasked256 OpAMD64VPADDBMasked256 - OpAMD64VPCMPEQBMasked256 - OpAMD64VPCMPGTBMasked256 OpAMD64VPMAXSBMasked256 OpAMD64VPMINSBMasked256 OpAMD64VPOPCNTBMasked256 @@ -1889,19 +1870,17 @@ const ( OpAMD64VPSUBBMasked256 OpAMD64VPMAXSB256 OpAMD64VPMINSB256 + OpAMD64VPOR256 OpAMD64VPOPCNTB256 OpAMD64VPADDSB256 OpAMD64VPSUBSB256 OpAMD64VPSIGNB256 OpAMD64VPSUBB256 + OpAMD64VPXOR256 OpAMD64VPABSB512 OpAMD64VPADDB512 - OpAMD64VPCMPEQB512 - OpAMD64VPCMPGTB512 OpAMD64VPABSBMasked512 OpAMD64VPADDBMasked512 - OpAMD64VPCMPEQBMasked512 - OpAMD64VPCMPGTBMasked512 OpAMD64VPMAXSBMasked512 OpAMD64VPMINSBMasked512 OpAMD64VPOPCNTBMasked512 @@ -19314,6 +19293,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VSUBPSMasked512", + argLen: 3, + asm: x86.AVSUBPS, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VXORPSMasked512", argLen: 3, @@ -19417,6 +19411,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VSUBPS512", + argLen: 2, + asm: x86.AVSUBPS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VXORPS512", argLen: 2, @@ -20307,6 +20315,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VSUBPSMasked128", + argLen: 3, + asm: x86.AVSUBPS, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VXORPSMasked128", argLen: 3, @@ -20438,6 +20461,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VSUBPS128", + argLen: 2, + asm: x86.AVSUBPS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VXORPS128", argLen: 2, @@ -21328,6 +21365,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VSUBPSMasked256", + argLen: 3, + asm: x86.AVSUBPS, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VXORPSMasked256", argLen: 3, @@ -21459,6 +21511,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VSUBPS256", + argLen: 2, + asm: x86.AVSUBPS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VXORPS256", argLen: 2, @@ -22349,6 +22415,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VSUBPDMasked128", + argLen: 3, + asm: x86.AVSUBPD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VXORPDMasked128", argLen: 3, @@ -22480,6 +22561,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VSUBPD128", + argLen: 2, + asm: x86.AVSUBPD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VXORPD128", argLen: 2, @@ -23370,6 +23465,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VSUBPDMasked256", + argLen: 3, + asm: x86.AVSUBPD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VXORPDMasked256", argLen: 3, @@ -23501,6 +23611,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VSUBPD256", + argLen: 2, + asm: x86.AVSUBPD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VXORPD256", argLen: 2, @@ -24377,6 +24501,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VSUBPDMasked512", + argLen: 3, + asm: x86.AVSUBPD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VXORPDMasked512", argLen: 3, @@ -24480,6 +24619,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VSUBPD512", + argLen: 2, + asm: x86.AVSUBPD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VXORPD512", argLen: 2, @@ -24523,35 +24676,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPAND256", - argLen: 2, - commutative: true, - asm: x86.AVPAND, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPANDN256", - argLen: 2, - asm: x86.AVPANDN, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, { name: "VPCMPEQW256", argLen: 2, @@ -24611,37 +24735,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPEQWMasked256", - argLen: 3, - commutative: true, - asm: x86.AVPCMPEQW, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, - { - name: "VPCMPGTWMasked256", - argLen: 3, - asm: x86.AVPCMPGTW, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VPMAXSWMasked256", argLen: 3, @@ -24841,21 +24934,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPOR256", - argLen: 2, - commutative: true, - asm: x86.AVPOR, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, { name: "VPMADDWD256", argLen: 2, @@ -24996,21 +25074,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPXOR256", - argLen: 2, - commutative: true, - asm: x86.AVPXOR, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, { name: "VPABSW512", argLen: 1, @@ -25039,35 +25102,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPEQW512", - argLen: 2, - commutative: true, - asm: x86.AVPCMPEQW, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, - { - name: "VPCMPGTW512", - argLen: 2, - asm: x86.AVPCMPGTW, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VPABSWMasked512", argLen: 2, @@ -25098,37 +25132,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPEQWMasked512", - argLen: 3, - commutative: true, - asm: x86.AVPCMPEQW, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, - { - name: "VPCMPGTWMasked512", - argLen: 3, - asm: x86.AVPCMPGTW, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VPMAXSWMasked512", argLen: 3, @@ -25426,35 +25429,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPAND128", - argLen: 2, - commutative: true, - asm: x86.AVPAND, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPANDN128", - argLen: 2, - asm: x86.AVPANDN, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, { name: "VPCMPEQW128", argLen: 2, @@ -25514,37 +25488,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPEQWMasked128", - argLen: 3, - commutative: true, - asm: x86.AVPCMPEQW, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, - { - name: "VPCMPGTWMasked128", - argLen: 3, - asm: x86.AVPCMPGTW, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VPMAXSWMasked128", argLen: 3, @@ -25744,21 +25687,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPOR128", - argLen: 2, - commutative: true, - asm: x86.AVPOR, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, { name: "VPMADDWD128", argLen: 2, @@ -25899,21 +25827,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPXOR128", - argLen: 2, - commutative: true, - asm: x86.AVPXOR, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, { name: "VPABSD512", argLen: 1, @@ -25971,35 +25884,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPEQD512", - argLen: 2, - commutative: true, - asm: x86.AVPCMPEQD, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, - { - name: "VPCMPGTD512", - argLen: 2, - asm: x86.AVPCMPGTD, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VPABSDMasked512", argLen: 2, @@ -26061,37 +25945,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPEQDMasked512", - argLen: 3, - commutative: true, - asm: x86.AVPCMPEQD, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, - { - name: "VPCMPGTDMasked512", - argLen: 3, - asm: x86.AVPCMPGTD, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VPMAXSDMasked512", argLen: 3, @@ -26553,37 +26406,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPEQDMasked128", - argLen: 3, - commutative: true, - asm: x86.AVPCMPEQD, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, - { - name: "VPCMPGTDMasked128", - argLen: 3, - asm: x86.AVPCMPGTD, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VPMAXSDMasked128", argLen: 3, @@ -27072,37 +26894,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPEQDMasked256", - argLen: 3, - commutative: true, - asm: x86.AVPCMPEQD, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, - { - name: "VPCMPGTDMasked256", - argLen: 3, - asm: x86.AVPCMPGTD, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VPMAXSDMasked256", argLen: 3, @@ -27516,20 +27307,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPGTQ128", - argLen: 2, - asm: x86.AVPCMPGTQ, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VPABSQMasked128", argLen: 2, @@ -27591,37 +27368,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPEQQMasked128", - argLen: 3, - commutative: true, - asm: x86.AVPCMPEQQ, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, - { - name: "VPCMPGTQMasked128", - argLen: 3, - asm: x86.AVPCMPGTQ, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VPMAXSQMasked128", argLen: 3, @@ -27937,37 +27683,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPEQQMasked256", - argLen: 3, - commutative: true, - asm: x86.AVPCMPEQQ, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, - { - name: "VPCMPGTQMasked256", - argLen: 3, - asm: x86.AVPCMPGTQ, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VPMAXSQMasked256", argLen: 3, @@ -28222,35 +27937,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPEQQ512", - argLen: 2, - commutative: true, - asm: x86.AVPCMPEQQ, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, - { - name: "VPCMPGTQ512", - argLen: 2, - asm: x86.AVPCMPGTQ, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VPABSQMasked512", argLen: 2, @@ -28312,37 +27998,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPEQQMasked512", - argLen: 3, - commutative: true, - asm: x86.AVPCMPEQQ, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, - { - name: "VPCMPGTQMasked512", - argLen: 3, - asm: x86.AVPCMPGTQ, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VPMAXSQMasked512", argLen: 3, @@ -28613,6 +28268,35 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPAND128", + argLen: 2, + commutative: true, + asm: x86.AVPAND, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPANDN128", + argLen: 2, + asm: x86.AVPANDN, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPCMPEQB128", argLen: 2, @@ -28672,37 +28356,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPEQBMasked128", - argLen: 3, - commutative: true, - asm: x86.AVPCMPEQB, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, - { - name: "VPCMPGTBMasked128", - argLen: 3, - asm: x86.AVPCMPGTB, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VPMAXSBMasked128", argLen: 3, @@ -28825,6 +28478,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPOR128", + argLen: 2, + commutative: true, + asm: x86.AVPOR, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPOPCNTB128", argLen: 1, @@ -28895,6 +28563,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPXOR128", + argLen: 2, + commutative: true, + asm: x86.AVPXOR, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPABSB256", argLen: 1, @@ -28923,6 +28606,35 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPAND256", + argLen: 2, + commutative: true, + asm: x86.AVPAND, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPANDN256", + argLen: 2, + asm: x86.AVPANDN, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPCMPEQB256", argLen: 2, @@ -28982,37 +28694,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPEQBMasked256", - argLen: 3, - commutative: true, - asm: x86.AVPCMPEQB, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, - { - name: "VPCMPGTBMasked256", - argLen: 3, - asm: x86.AVPCMPGTB, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VPMAXSBMasked256", argLen: 3, @@ -29135,6 +28816,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPOR256", + argLen: 2, + commutative: true, + asm: x86.AVPOR, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPOPCNTB256", argLen: 1, @@ -29205,6 +28901,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPXOR256", + argLen: 2, + commutative: true, + asm: x86.AVPXOR, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPABSB512", argLen: 1, @@ -29233,35 +28944,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPEQB512", - argLen: 2, - commutative: true, - asm: x86.AVPCMPEQB, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, - { - name: "VPCMPGTB512", - argLen: 2, - asm: x86.AVPCMPGTB, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VPABSBMasked512", argLen: 2, @@ -29292,37 +28974,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPEQBMasked512", - argLen: 3, - commutative: true, - asm: x86.AVPCMPEQB, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, - { - name: "VPCMPGTBMasked512", - argLen: 3, - asm: x86.AVPCMPGTB, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VPMAXSBMasked512", argLen: 3, @@ -31338,10 +30989,11 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPWMasked256", - auxType: auxInt8, - argLen: 3, - asm: x86.AVPCMPW, + name: "VPCMPWMasked256", + auxType: auxInt8, + argLen: 3, + commutative: true, + asm: x86.AVPCMPW, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -31354,10 +31006,11 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPW512", - auxType: auxInt8, - argLen: 2, - asm: x86.AVPCMPW, + name: "VPCMPW512", + auxType: auxInt8, + argLen: 2, + commutative: true, + asm: x86.AVPCMPW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -31369,10 +31022,11 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPWMasked512", - auxType: auxInt8, - argLen: 3, - asm: x86.AVPCMPW, + name: "VPCMPWMasked512", + auxType: auxInt8, + argLen: 3, + commutative: true, + asm: x86.AVPCMPW, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -31400,10 +31054,11 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPWMasked128", - auxType: auxInt8, - argLen: 3, - asm: x86.AVPCMPW, + name: "VPCMPWMasked128", + auxType: auxInt8, + argLen: 3, + commutative: true, + asm: x86.AVPCMPW, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -31416,10 +31071,11 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPD512", - auxType: auxInt8, - argLen: 2, - asm: x86.AVPCMPD, + name: "VPCMPD512", + auxType: auxInt8, + argLen: 2, + commutative: true, + asm: x86.AVPCMPD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -31431,10 +31087,11 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPDMasked512", - auxType: auxInt8, - argLen: 3, - asm: x86.AVPCMPD, + name: "VPCMPDMasked512", + auxType: auxInt8, + argLen: 3, + commutative: true, + asm: x86.AVPCMPD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -31462,10 +31119,11 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPDMasked128", - auxType: auxInt8, - argLen: 3, - asm: x86.AVPCMPD, + name: "VPCMPDMasked128", + auxType: auxInt8, + argLen: 3, + commutative: true, + asm: x86.AVPCMPD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -31493,10 +31151,11 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPDMasked256", - auxType: auxInt8, - argLen: 3, - asm: x86.AVPCMPD, + name: "VPCMPDMasked256", + auxType: auxInt8, + argLen: 3, + commutative: true, + asm: x86.AVPCMPD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -31524,10 +31183,11 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPQMasked128", - auxType: auxInt8, - argLen: 3, - asm: x86.AVPCMPQ, + name: "VPCMPQMasked128", + auxType: auxInt8, + argLen: 3, + commutative: true, + asm: x86.AVPCMPQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -31555,10 +31215,11 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPQMasked256", - auxType: auxInt8, - argLen: 3, - asm: x86.AVPCMPQ, + name: "VPCMPQMasked256", + auxType: auxInt8, + argLen: 3, + commutative: true, + asm: x86.AVPCMPQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -31571,10 +31232,11 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPQ512", - auxType: auxInt8, - argLen: 2, - asm: x86.AVPCMPQ, + name: "VPCMPQ512", + auxType: auxInt8, + argLen: 2, + commutative: true, + asm: x86.AVPCMPQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -31586,10 +31248,11 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPQMasked512", - auxType: auxInt8, - argLen: 3, - asm: x86.AVPCMPQ, + name: "VPCMPQMasked512", + auxType: auxInt8, + argLen: 3, + commutative: true, + asm: x86.AVPCMPQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -31617,10 +31280,11 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPBMasked128", - auxType: auxInt8, - argLen: 3, - asm: x86.AVPCMPB, + name: "VPCMPBMasked128", + auxType: auxInt8, + argLen: 3, + commutative: true, + asm: x86.AVPCMPB, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -31648,10 +31312,11 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPBMasked256", - auxType: auxInt8, - argLen: 3, - asm: x86.AVPCMPB, + name: "VPCMPBMasked256", + auxType: auxInt8, + argLen: 3, + commutative: true, + asm: x86.AVPCMPB, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -31664,10 +31329,11 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPB512", - auxType: auxInt8, - argLen: 2, - asm: x86.AVPCMPB, + name: "VPCMPB512", + auxType: auxInt8, + argLen: 2, + commutative: true, + asm: x86.AVPCMPB, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -31679,10 +31345,11 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPBMasked512", - auxType: auxInt8, - argLen: 3, - asm: x86.AVPCMPB, + name: "VPCMPBMasked512", + auxType: auxInt8, + argLen: 3, + commutative: true, + asm: x86.AVPCMPB, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index e9bafe2a1b4..80d8eef8733 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -4584,22 +4584,22 @@ func rewriteValueAMD64(v *Value) bool { v.Op = OpAMD64SUBL return true case OpSubFloat32x16: - v.Op = OpAMD64VADDPS512 + v.Op = OpAMD64VSUBPS512 return true case OpSubFloat32x4: - v.Op = OpAMD64VADDPS128 + v.Op = OpAMD64VSUBPS128 return true case OpSubFloat32x8: - v.Op = OpAMD64VADDPS256 + v.Op = OpAMD64VSUBPS256 return true case OpSubFloat64x2: - v.Op = OpAMD64VADDPD128 + v.Op = OpAMD64VSUBPD128 return true case OpSubFloat64x4: - v.Op = OpAMD64VADDPD256 + v.Op = OpAMD64VSUBPD256 return true case OpSubFloat64x8: - v.Op = OpAMD64VADDPD512 + v.Op = OpAMD64VSUBPD512 return true case OpSubInt16x16: v.Op = OpAMD64VPSUBW256 @@ -30476,12 +30476,13 @@ func rewriteValueAMD64_OpEqualInt16x32(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (EqualInt16x32 x y) - // result: (VPMOVMToVec16x32 (VPCMPEQW512 x y)) + // result: (VPMOVMToVec16x32 (VPCMPW512 [0] x y)) for { x := v_0 y := v_1 v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQW512, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) v0.AddArg2(x, y) v.AddArg(v0) return true @@ -30493,12 +30494,13 @@ func rewriteValueAMD64_OpEqualInt32x16(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (EqualInt32x16 x y) - // result: (VPMOVMToVec32x16 (VPCMPEQD512 x y)) + // result: (VPMOVMToVec32x16 (VPCMPD512 [0] x y)) for { x := v_0 y := v_1 v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQD512, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) v0.AddArg2(x, y) v.AddArg(v0) return true @@ -30510,12 +30512,13 @@ func rewriteValueAMD64_OpEqualInt64x8(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (EqualInt64x8 x y) - // result: (VPMOVMToVec64x8 (VPCMPEQQ512 x y)) + // result: (VPMOVMToVec64x8 (VPCMPQ512 [0] x y)) for { x := v_0 y := v_1 v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQQ512, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) v0.AddArg2(x, y) v.AddArg(v0) return true @@ -30527,12 +30530,13 @@ func rewriteValueAMD64_OpEqualInt8x64(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (EqualInt8x64 x y) - // result: (VPMOVMToVec8x64 (VPCMPEQB512 x y)) + // result: (VPMOVMToVec8x64 (VPCMPB512 [0] x y)) for { x := v_0 y := v_1 v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQB512, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) v0.AddArg2(x, y) v.AddArg(v0) return true @@ -31623,12 +31627,13 @@ func rewriteValueAMD64_OpGreaterInt16x32(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (GreaterInt16x32 x y) - // result: (VPMOVMToVec16x32 (VPCMPGTW512 x y)) + // result: (VPMOVMToVec16x32 (VPCMPW512 [6] x y)) for { x := v_0 y := v_1 v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTW512, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask) + v0.AuxInt = int8ToAuxInt(6) v0.AddArg2(x, y) v.AddArg(v0) return true @@ -31640,12 +31645,13 @@ func rewriteValueAMD64_OpGreaterInt32x16(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (GreaterInt32x16 x y) - // result: (VPMOVMToVec32x16 (VPCMPGTD512 x y)) + // result: (VPMOVMToVec32x16 (VPCMPD512 [6] x y)) for { x := v_0 y := v_1 v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTD512, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask) + v0.AuxInt = int8ToAuxInt(6) v0.AddArg2(x, y) v.AddArg(v0) return true @@ -31657,12 +31663,13 @@ func rewriteValueAMD64_OpGreaterInt64x2(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (GreaterInt64x2 x y) - // result: (VPMOVMToVec64x2 (VPCMPGTQ128 x y)) + // result: (VPMOVMToVec64x2 (VPCMPQ128 [6] x y)) for { x := v_0 y := v_1 v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTQ128, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ128, typ.Mask) + v0.AuxInt = int8ToAuxInt(6) v0.AddArg2(x, y) v.AddArg(v0) return true @@ -31674,12 +31681,13 @@ func rewriteValueAMD64_OpGreaterInt64x8(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (GreaterInt64x8 x y) - // result: (VPMOVMToVec64x8 (VPCMPGTQ512 x y)) + // result: (VPMOVMToVec64x8 (VPCMPQ512 [6] x y)) for { x := v_0 y := v_1 v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTQ512, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask) + v0.AuxInt = int8ToAuxInt(6) v0.AddArg2(x, y) v.AddArg(v0) return true @@ -31691,12 +31699,13 @@ func rewriteValueAMD64_OpGreaterInt8x64(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (GreaterInt8x64 x y) - // result: (VPMOVMToVec8x64 (VPCMPGTB512 x y)) + // result: (VPMOVMToVec8x64 (VPCMPB512 [6] x y)) for { x := v_0 y := v_1 v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTB512, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask) + v0.AuxInt = int8ToAuxInt(6) v0.AddArg2(x, y) v.AddArg(v0) return true @@ -37259,13 +37268,14 @@ func rewriteValueAMD64_OpMaskedEqualInt16x16(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (MaskedEqualInt16x16 x y mask) - // result: (VPMOVMToVec16x16 (VPCMPEQWMasked256 x y (VPMOVVec16x16ToM mask))) + // result: (VPMOVMToVec16x16 (VPCMPWMasked256 [0] x y (VPMOVVec16x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQWMasked256, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -37280,13 +37290,14 @@ func rewriteValueAMD64_OpMaskedEqualInt16x32(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (MaskedEqualInt16x32 x y mask) - // result: (VPMOVMToVec16x32 (VPCMPEQWMasked512 x y (VPMOVVec16x32ToM mask))) + // result: (VPMOVMToVec16x32 (VPCMPWMasked512 [0] x y (VPMOVVec16x32ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQWMasked512, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -37301,13 +37312,14 @@ func rewriteValueAMD64_OpMaskedEqualInt16x8(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (MaskedEqualInt16x8 x y mask) - // result: (VPMOVMToVec16x8 (VPCMPEQWMasked128 x y (VPMOVVec16x8ToM mask))) + // result: (VPMOVMToVec16x8 (VPCMPWMasked128 [0] x y (VPMOVVec16x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQWMasked128, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -37322,13 +37334,14 @@ func rewriteValueAMD64_OpMaskedEqualInt32x16(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (MaskedEqualInt32x16 x y mask) - // result: (VPMOVMToVec32x16 (VPCMPEQDMasked512 x y (VPMOVVec32x16ToM mask))) + // result: (VPMOVMToVec32x16 (VPCMPDMasked512 [0] x y (VPMOVVec32x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQDMasked512, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -37343,13 +37356,14 @@ func rewriteValueAMD64_OpMaskedEqualInt32x4(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (MaskedEqualInt32x4 x y mask) - // result: (VPMOVMToVec32x4 (VPCMPEQDMasked128 x y (VPMOVVec32x4ToM mask))) + // result: (VPMOVMToVec32x4 (VPCMPDMasked128 [0] x y (VPMOVVec32x4ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQDMasked128, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -37364,13 +37378,14 @@ func rewriteValueAMD64_OpMaskedEqualInt32x8(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (MaskedEqualInt32x8 x y mask) - // result: (VPMOVMToVec32x8 (VPCMPEQDMasked256 x y (VPMOVVec32x8ToM mask))) + // result: (VPMOVMToVec32x8 (VPCMPDMasked256 [0] x y (VPMOVVec32x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQDMasked256, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -37385,13 +37400,14 @@ func rewriteValueAMD64_OpMaskedEqualInt64x2(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (MaskedEqualInt64x2 x y mask) - // result: (VPMOVMToVec64x2 (VPCMPEQQMasked128 x y (VPMOVVec64x2ToM mask))) + // result: (VPMOVMToVec64x2 (VPCMPQMasked128 [0] x y (VPMOVVec64x2ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQQMasked128, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -37406,13 +37422,14 @@ func rewriteValueAMD64_OpMaskedEqualInt64x4(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (MaskedEqualInt64x4 x y mask) - // result: (VPMOVMToVec64x4 (VPCMPEQQMasked256 x y (VPMOVVec64x4ToM mask))) + // result: (VPMOVMToVec64x4 (VPCMPQMasked256 [0] x y (VPMOVVec64x4ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQQMasked256, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -37427,13 +37444,14 @@ func rewriteValueAMD64_OpMaskedEqualInt64x8(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (MaskedEqualInt64x8 x y mask) - // result: (VPMOVMToVec64x8 (VPCMPEQQMasked512 x y (VPMOVVec64x8ToM mask))) + // result: (VPMOVMToVec64x8 (VPCMPQMasked512 [0] x y (VPMOVVec64x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQQMasked512, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -37448,13 +37466,14 @@ func rewriteValueAMD64_OpMaskedEqualInt8x16(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (MaskedEqualInt8x16 x y mask) - // result: (VPMOVMToVec8x16 (VPCMPEQBMasked128 x y (VPMOVVec8x16ToM mask))) + // result: (VPMOVMToVec8x16 (VPCMPBMasked128 [0] x y (VPMOVVec8x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQBMasked128, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -37469,13 +37488,14 @@ func rewriteValueAMD64_OpMaskedEqualInt8x32(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (MaskedEqualInt8x32 x y mask) - // result: (VPMOVMToVec8x32 (VPCMPEQBMasked256 x y (VPMOVVec8x32ToM mask))) + // result: (VPMOVMToVec8x32 (VPCMPBMasked256 [0] x y (VPMOVVec8x32ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQBMasked256, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -37490,13 +37510,14 @@ func rewriteValueAMD64_OpMaskedEqualInt8x64(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (MaskedEqualInt8x64 x y mask) - // result: (VPMOVMToVec8x64 (VPCMPEQBMasked512 x y (VPMOVVec8x64ToM mask))) + // result: (VPMOVMToVec8x64 (VPCMPBMasked512 [0] x y (VPMOVVec8x64ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQBMasked512, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -40943,13 +40964,14 @@ func rewriteValueAMD64_OpMaskedGreaterInt16x16(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (MaskedGreaterInt16x16 x y mask) - // result: (VPMOVMToVec16x16 (VPCMPGTWMasked256 x y (VPMOVVec16x16ToM mask))) + // result: (VPMOVMToVec16x16 (VPCMPWMasked256 [6] x y (VPMOVVec16x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTWMasked256, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(6) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -40964,13 +40986,14 @@ func rewriteValueAMD64_OpMaskedGreaterInt16x32(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (MaskedGreaterInt16x32 x y mask) - // result: (VPMOVMToVec16x32 (VPCMPGTWMasked512 x y (VPMOVVec16x32ToM mask))) + // result: (VPMOVMToVec16x32 (VPCMPWMasked512 [6] x y (VPMOVVec16x32ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTWMasked512, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(6) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -40985,13 +41008,14 @@ func rewriteValueAMD64_OpMaskedGreaterInt16x8(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (MaskedGreaterInt16x8 x y mask) - // result: (VPMOVMToVec16x8 (VPCMPGTWMasked128 x y (VPMOVVec16x8ToM mask))) + // result: (VPMOVMToVec16x8 (VPCMPWMasked128 [6] x y (VPMOVVec16x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTWMasked128, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(6) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -41006,13 +41030,14 @@ func rewriteValueAMD64_OpMaskedGreaterInt32x16(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (MaskedGreaterInt32x16 x y mask) - // result: (VPMOVMToVec32x16 (VPCMPGTDMasked512 x y (VPMOVVec32x16ToM mask))) + // result: (VPMOVMToVec32x16 (VPCMPDMasked512 [6] x y (VPMOVVec32x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTDMasked512, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(6) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -41027,13 +41052,14 @@ func rewriteValueAMD64_OpMaskedGreaterInt32x4(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (MaskedGreaterInt32x4 x y mask) - // result: (VPMOVMToVec32x4 (VPCMPGTDMasked128 x y (VPMOVVec32x4ToM mask))) + // result: (VPMOVMToVec32x4 (VPCMPDMasked128 [6] x y (VPMOVVec32x4ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTDMasked128, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(6) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -41048,13 +41074,14 @@ func rewriteValueAMD64_OpMaskedGreaterInt32x8(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (MaskedGreaterInt32x8 x y mask) - // result: (VPMOVMToVec32x8 (VPCMPGTDMasked256 x y (VPMOVVec32x8ToM mask))) + // result: (VPMOVMToVec32x8 (VPCMPDMasked256 [6] x y (VPMOVVec32x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTDMasked256, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(6) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -41069,13 +41096,14 @@ func rewriteValueAMD64_OpMaskedGreaterInt64x2(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (MaskedGreaterInt64x2 x y mask) - // result: (VPMOVMToVec64x2 (VPCMPGTQMasked128 x y (VPMOVVec64x2ToM mask))) + // result: (VPMOVMToVec64x2 (VPCMPQMasked128 [6] x y (VPMOVVec64x2ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTQMasked128, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(6) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -41090,13 +41118,14 @@ func rewriteValueAMD64_OpMaskedGreaterInt64x4(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (MaskedGreaterInt64x4 x y mask) - // result: (VPMOVMToVec64x4 (VPCMPGTQMasked256 x y (VPMOVVec64x4ToM mask))) + // result: (VPMOVMToVec64x4 (VPCMPQMasked256 [6] x y (VPMOVVec64x4ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTQMasked256, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(6) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -41111,13 +41140,14 @@ func rewriteValueAMD64_OpMaskedGreaterInt64x8(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (MaskedGreaterInt64x8 x y mask) - // result: (VPMOVMToVec64x8 (VPCMPGTQMasked512 x y (VPMOVVec64x8ToM mask))) + // result: (VPMOVMToVec64x8 (VPCMPQMasked512 [6] x y (VPMOVVec64x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTQMasked512, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(6) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -41132,13 +41162,14 @@ func rewriteValueAMD64_OpMaskedGreaterInt8x16(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (MaskedGreaterInt8x16 x y mask) - // result: (VPMOVMToVec8x16 (VPCMPGTBMasked128 x y (VPMOVVec8x16ToM mask))) + // result: (VPMOVMToVec8x16 (VPCMPBMasked128 [6] x y (VPMOVVec8x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTBMasked128, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(6) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -41153,13 +41184,14 @@ func rewriteValueAMD64_OpMaskedGreaterInt8x32(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (MaskedGreaterInt8x32 x y mask) - // result: (VPMOVMToVec8x32 (VPCMPGTBMasked256 x y (VPMOVVec8x32ToM mask))) + // result: (VPMOVMToVec8x32 (VPCMPBMasked256 [6] x y (VPMOVVec8x32ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTBMasked256, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(6) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -41174,13 +41206,14 @@ func rewriteValueAMD64_OpMaskedGreaterInt8x64(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (MaskedGreaterInt8x64 x y mask) - // result: (VPMOVMToVec8x64 (VPCMPGTBMasked512 x y (VPMOVVec8x64ToM mask))) + // result: (VPMOVMToVec8x64 (VPCMPBMasked512 [6] x y (VPMOVVec8x64ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTBMasked512, typ.Mask) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(6) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -47044,12 +47077,12 @@ func rewriteValueAMD64_OpMaskedSubFloat32x16(v *Value) bool { v_0 := v.Args[0] b := v.Block // match: (MaskedSubFloat32x16 x y mask) - // result: (VADDPSMasked512 x y (VPMOVVec32x16ToM mask)) + // result: (VSUBPSMasked512 x y (VPMOVVec32x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VADDPSMasked512) + v.reset(OpAMD64VSUBPSMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) @@ -47062,12 +47095,12 @@ func rewriteValueAMD64_OpMaskedSubFloat32x4(v *Value) bool { v_0 := v.Args[0] b := v.Block // match: (MaskedSubFloat32x4 x y mask) - // result: (VADDPSMasked128 x y (VPMOVVec32x4ToM mask)) + // result: (VSUBPSMasked128 x y (VPMOVVec32x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VADDPSMasked128) + v.reset(OpAMD64VSUBPSMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) @@ -47080,12 +47113,12 @@ func rewriteValueAMD64_OpMaskedSubFloat32x8(v *Value) bool { v_0 := v.Args[0] b := v.Block // match: (MaskedSubFloat32x8 x y mask) - // result: (VADDPSMasked256 x y (VPMOVVec32x8ToM mask)) + // result: (VSUBPSMasked256 x y (VPMOVVec32x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VADDPSMasked256) + v.reset(OpAMD64VSUBPSMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) @@ -47098,12 +47131,12 @@ func rewriteValueAMD64_OpMaskedSubFloat64x2(v *Value) bool { v_0 := v.Args[0] b := v.Block // match: (MaskedSubFloat64x2 x y mask) - // result: (VADDPDMasked128 x y (VPMOVVec64x2ToM mask)) + // result: (VSUBPDMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VADDPDMasked128) + v.reset(OpAMD64VSUBPDMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) @@ -47116,12 +47149,12 @@ func rewriteValueAMD64_OpMaskedSubFloat64x4(v *Value) bool { v_0 := v.Args[0] b := v.Block // match: (MaskedSubFloat64x4 x y mask) - // result: (VADDPDMasked256 x y (VPMOVVec64x4ToM mask)) + // result: (VSUBPDMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VADDPDMasked256) + v.reset(OpAMD64VSUBPDMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) @@ -47134,12 +47167,12 @@ func rewriteValueAMD64_OpMaskedSubFloat64x8(v *Value) bool { v_0 := v.Args[0] b := v.Block // match: (MaskedSubFloat64x8 x y mask) - // result: (VADDPDMasked512 x y (VPMOVVec64x8ToM mask)) + // result: (VSUBPDMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VADDPDMasked512) + v.reset(OpAMD64VSUBPDMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go index f5492ac6e8e..b86c8151660 100644 --- a/src/cmd/compile/internal/ssagen/simdintrinsics.go +++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go @@ -1370,195 +1370,195 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint32x4.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint32x8.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint32x8.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float32x16.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.RoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpRoundSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.RoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpRoundSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.RoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpRoundSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.RoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpRoundSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.RoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpRoundSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.RoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpRoundSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.RoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpRoundSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.TruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpTruncSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.TruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpTruncSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.TruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpTruncSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.TruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpTruncSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.TruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpTruncSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.TruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpTruncSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.TruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpTruncSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) @@ -1832,12 +1832,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint8x64.AsUint16x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) addF(simdPackage, "Uint8x64.AsUint32x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) addF(simdPackage, "Uint8x64.AsUint64x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) + addF(simdPackage, "LoadInt8x16", simdLoad(), sys.AMD64) + addF(simdPackage, "Int8x16.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadInt16x8", simdLoad(), sys.AMD64) addF(simdPackage, "Int16x8.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadInt32x4", simdLoad(), sys.AMD64) addF(simdPackage, "Int32x4.Store", simdStore(), sys.AMD64) - addF(simdPackage, "LoadInt8x16", simdLoad(), sys.AMD64) - addF(simdPackage, "Int8x16.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadInt64x2", simdLoad(), sys.AMD64) addF(simdPackage, "Int64x2.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadMask64x2", simdLoad(), sys.AMD64) @@ -1846,26 +1846,26 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Float32x4.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadFloat64x2", simdLoad(), sys.AMD64) addF(simdPackage, "Float64x2.Store", simdStore(), sys.AMD64) + addF(simdPackage, "LoadUint8x16", simdLoad(), sys.AMD64) + addF(simdPackage, "Uint8x16.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadUint16x8", simdLoad(), sys.AMD64) addF(simdPackage, "Uint16x8.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadUint32x4", simdLoad(), sys.AMD64) addF(simdPackage, "Uint32x4.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadUint64x2", simdLoad(), sys.AMD64) addF(simdPackage, "Uint64x2.Store", simdStore(), sys.AMD64) - addF(simdPackage, "LoadUint8x16", simdLoad(), sys.AMD64) - addF(simdPackage, "Uint8x16.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadMask32x4", simdLoad(), sys.AMD64) addF(simdPackage, "Mask32x4.Store", simdStore(), sys.AMD64) - addF(simdPackage, "LoadMask16x8", simdLoad(), sys.AMD64) - addF(simdPackage, "Mask16x8.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadMask8x16", simdLoad(), sys.AMD64) addF(simdPackage, "Mask8x16.Store", simdStore(), sys.AMD64) + addF(simdPackage, "LoadMask16x8", simdLoad(), sys.AMD64) + addF(simdPackage, "Mask16x8.Store", simdStore(), sys.AMD64) + addF(simdPackage, "LoadInt8x32", simdLoad(), sys.AMD64) + addF(simdPackage, "Int8x32.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadInt16x16", simdLoad(), sys.AMD64) addF(simdPackage, "Int16x16.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadInt32x8", simdLoad(), sys.AMD64) addF(simdPackage, "Int32x8.Store", simdStore(), sys.AMD64) - addF(simdPackage, "LoadInt8x32", simdLoad(), sys.AMD64) - addF(simdPackage, "Int8x32.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadInt64x4", simdLoad(), sys.AMD64) addF(simdPackage, "Int64x4.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadMask64x4", simdLoad(), sys.AMD64) @@ -1874,20 +1874,24 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Float32x8.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadFloat64x4", simdLoad(), sys.AMD64) addF(simdPackage, "Float64x4.Store", simdStore(), sys.AMD64) + addF(simdPackage, "LoadUint8x32", simdLoad(), sys.AMD64) + addF(simdPackage, "Uint8x32.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadUint16x16", simdLoad(), sys.AMD64) addF(simdPackage, "Uint16x16.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadUint32x8", simdLoad(), sys.AMD64) addF(simdPackage, "Uint32x8.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadUint64x4", simdLoad(), sys.AMD64) addF(simdPackage, "Uint64x4.Store", simdStore(), sys.AMD64) - addF(simdPackage, "LoadUint8x32", simdLoad(), sys.AMD64) - addF(simdPackage, "Uint8x32.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadMask32x8", simdLoad(), sys.AMD64) addF(simdPackage, "Mask32x8.Store", simdStore(), sys.AMD64) - addF(simdPackage, "LoadMask16x16", simdLoad(), sys.AMD64) - addF(simdPackage, "Mask16x16.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadMask8x32", simdLoad(), sys.AMD64) addF(simdPackage, "Mask8x32.Store", simdStore(), sys.AMD64) + addF(simdPackage, "LoadMask16x16", simdLoad(), sys.AMD64) + addF(simdPackage, "Mask16x16.Store", simdStore(), sys.AMD64) + addF(simdPackage, "LoadInt8x64", simdLoad(), sys.AMD64) + addF(simdPackage, "Int8x64.Store", simdStore(), sys.AMD64) + addF(simdPackage, "LoadMask8x64", simdLoad(), sys.AMD64) + addF(simdPackage, "Mask8x64.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadInt16x32", simdLoad(), sys.AMD64) addF(simdPackage, "Int16x32.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadMask16x32", simdLoad(), sys.AMD64) @@ -1900,22 +1904,18 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Int64x8.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadMask64x8", simdLoad(), sys.AMD64) addF(simdPackage, "Mask64x8.Store", simdStore(), sys.AMD64) - addF(simdPackage, "LoadInt8x64", simdLoad(), sys.AMD64) - addF(simdPackage, "Int8x64.Store", simdStore(), sys.AMD64) - addF(simdPackage, "LoadMask8x64", simdLoad(), sys.AMD64) - addF(simdPackage, "Mask8x64.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadFloat32x16", simdLoad(), sys.AMD64) addF(simdPackage, "Float32x16.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadFloat64x8", simdLoad(), sys.AMD64) addF(simdPackage, "Float64x8.Store", simdStore(), sys.AMD64) + addF(simdPackage, "LoadUint8x64", simdLoad(), sys.AMD64) + addF(simdPackage, "Uint8x64.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadUint16x32", simdLoad(), sys.AMD64) addF(simdPackage, "Uint16x32.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadUint32x16", simdLoad(), sys.AMD64) addF(simdPackage, "Uint32x16.Store", simdStore(), sys.AMD64) addF(simdPackage, "LoadUint64x8", simdLoad(), sys.AMD64) addF(simdPackage, "Uint64x8.Store", simdStore(), sys.AMD64) - addF(simdPackage, "LoadUint8x64", simdLoad(), sys.AMD64) - addF(simdPackage, "Uint8x64.Store", simdStore(), sys.AMD64) addF(simdPackage, "Mask16x16.AsInt16x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) addF(simdPackage, "Int16x16.AsMask16x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) addF(simdPackage, "Mask16x16.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64) diff --git a/src/simd/stubs_amd64.go b/src/simd/stubs_amd64.go index cf37b5efced..65332bf3fa7 100644 --- a/src/simd/stubs_amd64.go +++ b/src/simd/stubs_amd64.go @@ -4,7259 +4,629 @@ package simd -// ApproximateReciprocal computes an approximate reciprocal of each element. -// -// Asm: VRCP14PS, CPU Feature: AVX512EVEX -func (x Float32x16) ApproximateReciprocal() Float32x16 +/* Absolute */ -// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. +// Absolute computes the absolute value of each element. // -// Asm: VRSQRT14PS, CPU Feature: AVX512EVEX -func (x Float32x16) ApproximateReciprocalOfSqrt() Float32x16 +// Asm: VPABSB, CPU Feature: AVX +func (x Int8x16) Absolute() Int8x16 -// Sqrt computes the square root of each element. +// Absolute computes the absolute value of each element. // -// Asm: VSQRTPS, CPU Feature: AVX512EVEX -func (x Float32x16) Sqrt() Float32x16 +// Asm: VPABSB, CPU Feature: AVX2 +func (x Int8x32) Absolute() Int8x32 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSB, CPU Feature: AVX512EVEX +func (x Int8x64) Absolute() Int8x64 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSW, CPU Feature: AVX +func (x Int16x8) Absolute() Int16x8 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSW, CPU Feature: AVX2 +func (x Int16x16) Absolute() Int16x16 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSW, CPU Feature: AVX512EVEX +func (x Int16x32) Absolute() Int16x32 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSD, CPU Feature: AVX +func (x Int32x4) Absolute() Int32x4 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSD, CPU Feature: AVX2 +func (x Int32x8) Absolute() Int32x8 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSD, CPU Feature: AVX512EVEX +func (x Int32x16) Absolute() Int32x16 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSQ, CPU Feature: AVX512EVEX +func (x Int64x2) Absolute() Int64x2 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSQ, CPU Feature: AVX512EVEX +func (x Int64x4) Absolute() Int64x4 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSQ, CPU Feature: AVX512EVEX +func (x Int64x8) Absolute() Int64x8 + +/* Add */ + +// Add adds corresponding elements of two vectors. +// +// Asm: VADDPS, CPU Feature: AVX +func (x Float32x4) Add(y Float32x4) Float32x4 + +// Add adds corresponding elements of two vectors. +// +// Asm: VADDPS, CPU Feature: AVX +func (x Float32x8) Add(y Float32x8) Float32x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VADDPS, CPU Feature: AVX512EVEX +func (x Float32x16) Add(y Float32x16) Float32x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VADDPD, CPU Feature: AVX +func (x Float64x2) Add(y Float64x2) Float64x2 + +// Add adds corresponding elements of two vectors. +// +// Asm: VADDPD, CPU Feature: AVX +func (x Float64x4) Add(y Float64x4) Float64x4 + +// Add adds corresponding elements of two vectors. +// +// Asm: VADDPD, CPU Feature: AVX512EVEX +func (x Float64x8) Add(y Float64x8) Float64x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDB, CPU Feature: AVX +func (x Int8x16) Add(y Int8x16) Int8x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDB, CPU Feature: AVX2 +func (x Int8x32) Add(y Int8x32) Int8x32 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDB, CPU Feature: AVX512EVEX +func (x Int8x64) Add(y Int8x64) Int8x64 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDW, CPU Feature: AVX +func (x Int16x8) Add(y Int16x8) Int16x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDW, CPU Feature: AVX2 +func (x Int16x16) Add(y Int16x16) Int16x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDW, CPU Feature: AVX512EVEX +func (x Int16x32) Add(y Int16x32) Int16x32 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDD, CPU Feature: AVX +func (x Int32x4) Add(y Int32x4) Int32x4 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDD, CPU Feature: AVX2 +func (x Int32x8) Add(y Int32x8) Int32x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDD, CPU Feature: AVX512EVEX +func (x Int32x16) Add(y Int32x16) Int32x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDQ, CPU Feature: AVX +func (x Int64x2) Add(y Int64x2) Int64x2 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDQ, CPU Feature: AVX2 +func (x Int64x4) Add(y Int64x4) Int64x4 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDQ, CPU Feature: AVX512EVEX +func (x Int64x8) Add(y Int64x8) Int64x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDB, CPU Feature: AVX +func (x Uint8x16) Add(y Uint8x16) Uint8x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDB, CPU Feature: AVX2 +func (x Uint8x32) Add(y Uint8x32) Uint8x32 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDB, CPU Feature: AVX512EVEX +func (x Uint8x64) Add(y Uint8x64) Uint8x64 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDW, CPU Feature: AVX +func (x Uint16x8) Add(y Uint16x8) Uint16x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDW, CPU Feature: AVX2 +func (x Uint16x16) Add(y Uint16x16) Uint16x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDW, CPU Feature: AVX512EVEX +func (x Uint16x32) Add(y Uint16x32) Uint16x32 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDD, CPU Feature: AVX +func (x Uint32x4) Add(y Uint32x4) Uint32x4 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDD, CPU Feature: AVX2 +func (x Uint32x8) Add(y Uint32x8) Uint32x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDD, CPU Feature: AVX512EVEX +func (x Uint32x16) Add(y Uint32x16) Uint32x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDQ, CPU Feature: AVX +func (x Uint64x2) Add(y Uint64x2) Uint64x2 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDQ, CPU Feature: AVX2 +func (x Uint64x4) Add(y Uint64x4) Uint64x4 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDQ, CPU Feature: AVX512EVEX +func (x Uint64x8) Add(y Uint64x8) Uint64x8 + +/* AddSub */ + +// AddSub subtracts even elements and adds odd elements of two vectors. +// +// Asm: VADDSUBPS, CPU Feature: AVX +func (x Float32x4) AddSub(y Float32x4) Float32x4 + +// AddSub subtracts even elements and adds odd elements of two vectors. +// +// Asm: VADDSUBPS, CPU Feature: AVX +func (x Float32x8) AddSub(y Float32x8) Float32x8 + +// AddSub subtracts even elements and adds odd elements of two vectors. +// +// Asm: VADDSUBPD, CPU Feature: AVX +func (x Float64x2) AddSub(y Float64x2) Float64x2 + +// AddSub subtracts even elements and adds odd elements of two vectors. +// +// Asm: VADDSUBPD, CPU Feature: AVX +func (x Float64x4) AddSub(y Float64x4) Float64x4 + +/* And */ + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VANDPS, CPU Feature: AVX +func (x Float32x4) And(y Float32x4) Float32x4 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VANDPS, CPU Feature: AVX +func (x Float32x8) And(y Float32x8) Float32x8 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VANDPS, CPU Feature: AVX512EVEX +func (x Float32x16) And(y Float32x16) Float32x16 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VANDPD, CPU Feature: AVX +func (x Float64x2) And(y Float64x2) Float64x2 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VANDPD, CPU Feature: AVX +func (x Float64x4) And(y Float64x4) Float64x4 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VANDPD, CPU Feature: AVX512EVEX +func (x Float64x8) And(y Float64x8) Float64x8 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX +func (x Int8x16) And(y Int8x16) Int8x16 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX2 +func (x Int8x32) And(y Int8x32) Int8x32 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX +func (x Int16x8) And(y Int16x8) Int16x8 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX2 +func (x Int16x16) And(y Int16x16) Int16x16 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX +func (x Int32x4) And(y Int32x4) Int32x4 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX2 +func (x Int32x8) And(y Int32x8) Int32x8 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDD, CPU Feature: AVX512EVEX +func (x Int32x16) And(y Int32x16) Int32x16 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX +func (x Int64x2) And(y Int64x2) Int64x2 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX2 +func (x Int64x4) And(y Int64x4) Int64x4 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDQ, CPU Feature: AVX512EVEX +func (x Int64x8) And(y Int64x8) Int64x8 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX +func (x Uint8x16) And(y Uint8x16) Uint8x16 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX2 +func (x Uint8x32) And(y Uint8x32) Uint8x32 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX +func (x Uint16x8) And(y Uint16x8) Uint16x8 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX2 +func (x Uint16x16) And(y Uint16x16) Uint16x16 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX +func (x Uint32x4) And(y Uint32x4) Uint32x4 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX2 +func (x Uint32x8) And(y Uint32x8) Uint32x8 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDD, CPU Feature: AVX512EVEX +func (x Uint32x16) And(y Uint32x16) Uint32x16 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX +func (x Uint64x2) And(y Uint64x2) Uint64x2 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX2 +func (x Uint64x4) And(y Uint64x4) Uint64x4 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDQ, CPU Feature: AVX512EVEX +func (x Uint64x8) And(y Uint64x8) Uint64x8 + +/* AndNot */ + +// AndNot performs a bitwise AND NOT operation between two vectors. +// +// Asm: VANDNPS, CPU Feature: AVX +func (x Float32x4) AndNot(y Float32x4) Float32x4 + +// AndNot performs a bitwise AND NOT operation between two vectors. +// +// Asm: VANDNPS, CPU Feature: AVX +func (x Float32x8) AndNot(y Float32x8) Float32x8 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VANDNPS, CPU Feature: AVX512EVEX +func (x Float32x16) AndNot(y Float32x16) Float32x16 + +// AndNot performs a bitwise AND NOT operation between two vectors. +// +// Asm: VANDNPD, CPU Feature: AVX +func (x Float64x2) AndNot(y Float64x2) Float64x2 + +// AndNot performs a bitwise AND NOT operation between two vectors. +// +// Asm: VANDNPD, CPU Feature: AVX +func (x Float64x4) AndNot(y Float64x4) Float64x4 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VANDNPD, CPU Feature: AVX512EVEX +func (x Float64x8) AndNot(y Float64x8) Float64x8 + +// AndNot performs a bitwise AND NOT operation between two vectors. +// +// Asm: VPANDN, CPU Feature: AVX +func (x Int8x16) AndNot(y Int8x16) Int8x16 + +// AndNot performs a bitwise AND NOT operation between two vectors. +// +// Asm: VPANDN, CPU Feature: AVX2 +func (x Int8x32) AndNot(y Int8x32) Int8x32 + +// AndNot performs a bitwise AND NOT operation between two vectors. +// +// Asm: VPANDN, CPU Feature: AVX +func (x Int16x8) AndNot(y Int16x8) Int16x8 + +// AndNot performs a bitwise AND NOT operation between two vectors. +// +// Asm: VPANDN, CPU Feature: AVX2 +func (x Int16x16) AndNot(y Int16x16) Int16x16 + +// AndNot performs a bitwise AND NOT operation between two vectors. +// +// Asm: VPANDN, CPU Feature: AVX +func (x Int32x4) AndNot(y Int32x4) Int32x4 + +// AndNot performs a bitwise AND NOT operation between two vectors. +// +// Asm: VPANDN, CPU Feature: AVX2 +func (x Int32x8) AndNot(y Int32x8) Int32x8 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VPANDND, CPU Feature: AVX512EVEX +func (x Int32x16) AndNot(y Int32x16) Int32x16 + +// AndNot performs a bitwise AND NOT operation between two vectors. +// +// Asm: VPANDN, CPU Feature: AVX +func (x Int64x2) AndNot(y Int64x2) Int64x2 + +// AndNot performs a bitwise AND NOT operation between two vectors. +// +// Asm: VPANDN, CPU Feature: AVX2 +func (x Int64x4) AndNot(y Int64x4) Int64x4 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VPANDNQ, CPU Feature: AVX512EVEX +func (x Int64x8) AndNot(y Int64x8) Int64x8 + +// AndNot performs a bitwise AND NOT operation between two vectors. +// +// Asm: VPANDN, CPU Feature: AVX +func (x Uint8x16) AndNot(y Uint8x16) Uint8x16 + +// AndNot performs a bitwise AND NOT operation between two vectors. +// +// Asm: VPANDN, CPU Feature: AVX2 +func (x Uint8x32) AndNot(y Uint8x32) Uint8x32 + +// AndNot performs a bitwise AND NOT operation between two vectors. +// +// Asm: VPANDN, CPU Feature: AVX +func (x Uint16x8) AndNot(y Uint16x8) Uint16x8 + +// AndNot performs a bitwise AND NOT operation between two vectors. +// +// Asm: VPANDN, CPU Feature: AVX2 +func (x Uint16x16) AndNot(y Uint16x16) Uint16x16 + +// AndNot performs a bitwise AND NOT operation between two vectors. +// +// Asm: VPANDN, CPU Feature: AVX +func (x Uint32x4) AndNot(y Uint32x4) Uint32x4 + +// AndNot performs a bitwise AND NOT operation between two vectors. +// +// Asm: VPANDN, CPU Feature: AVX2 +func (x Uint32x8) AndNot(y Uint32x8) Uint32x8 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VPANDND, CPU Feature: AVX512EVEX +func (x Uint32x16) AndNot(y Uint32x16) Uint32x16 + +// AndNot performs a bitwise AND NOT operation between two vectors. +// +// Asm: VPANDN, CPU Feature: AVX +func (x Uint64x2) AndNot(y Uint64x2) Uint64x2 + +// AndNot performs a bitwise AND NOT operation between two vectors. +// +// Asm: VPANDN, CPU Feature: AVX2 +func (x Uint64x4) AndNot(y Uint64x4) Uint64x4 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VPANDNQ, CPU Feature: AVX512EVEX +func (x Uint64x8) AndNot(y Uint64x8) Uint64x8 + +/* ApproximateReciprocal */ // ApproximateReciprocal computes an approximate reciprocal of each element. // // Asm: VRCP14PS, CPU Feature: AVX512EVEX func (x Float32x4) ApproximateReciprocal() Float32x4 +// ApproximateReciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PS, CPU Feature: AVX512EVEX +func (x Float32x8) ApproximateReciprocal() Float32x8 + +// ApproximateReciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PS, CPU Feature: AVX512EVEX +func (x Float32x16) ApproximateReciprocal() Float32x16 + +// ApproximateReciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PD, CPU Feature: AVX512EVEX +func (x Float64x2) ApproximateReciprocal() Float64x2 + +// ApproximateReciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PD, CPU Feature: AVX512EVEX +func (x Float64x4) ApproximateReciprocal() Float64x4 + +// ApproximateReciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PD, CPU Feature: AVX512EVEX +func (x Float64x8) ApproximateReciprocal() Float64x8 + +/* ApproximateReciprocalOfSqrt */ + // ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. // // Asm: VRSQRTPS, CPU Feature: AVX func (x Float32x4) ApproximateReciprocalOfSqrt() Float32x4 +// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. +// +// Asm: VRSQRTPS, CPU Feature: AVX +func (x Float32x8) ApproximateReciprocalOfSqrt() Float32x8 + +// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. +// +// Asm: VRSQRT14PS, CPU Feature: AVX512EVEX +func (x Float32x16) ApproximateReciprocalOfSqrt() Float32x16 + +// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. +// +// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX +func (x Float64x2) ApproximateReciprocalOfSqrt() Float64x2 + +// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. +// +// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX +func (x Float64x4) ApproximateReciprocalOfSqrt() Float64x4 + +// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. +// +// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX +func (x Float64x8) ApproximateReciprocalOfSqrt() Float64x8 + +/* Average */ + +// Average computes the rounded average of corresponding elements. +// +// Asm: VPAVGB, CPU Feature: AVX +func (x Uint8x16) Average(y Uint8x16) Uint8x16 + +// Average computes the rounded average of corresponding elements. +// +// Asm: VPAVGB, CPU Feature: AVX2 +func (x Uint8x32) Average(y Uint8x32) Uint8x32 + +// Average computes the rounded average of corresponding elements. +// +// Asm: VPAVGB, CPU Feature: AVX512EVEX +func (x Uint8x64) Average(y Uint8x64) Uint8x64 + +// Average computes the rounded average of corresponding elements. +// +// Asm: VPAVGW, CPU Feature: AVX +func (x Uint16x8) Average(y Uint16x8) Uint16x8 + +// Average computes the rounded average of corresponding elements. +// +// Asm: VPAVGW, CPU Feature: AVX2 +func (x Uint16x16) Average(y Uint16x16) Uint16x16 + +// Average computes the rounded average of corresponding elements. +// +// Asm: VPAVGW, CPU Feature: AVX512EVEX +func (x Uint16x32) Average(y Uint16x32) Uint16x32 + +/* Ceil */ + // Ceil rounds elements up to the nearest integer. // Const Immediate = 2. // // Asm: VROUNDPS, CPU Feature: AVX func (x Float32x4) Ceil() Float32x4 -// Floor rounds elements down to the nearest integer. -// Const Immediate = 1. -// -// Asm: VROUNDPS, CPU Feature: AVX -func (x Float32x4) Floor() Float32x4 - -// Round rounds elements to the nearest integer. -// Const Immediate = 0. -// -// Asm: VROUNDPS, CPU Feature: AVX -func (x Float32x4) Round() Float32x4 - -// Sqrt computes the square root of each element. -// -// Asm: VSQRTPS, CPU Feature: AVX -func (x Float32x4) Sqrt() Float32x4 - -// Trunc truncates elements towards zero. -// Const Immediate = 3. -// -// Asm: VROUNDPS, CPU Feature: AVX -func (x Float32x4) Trunc() Float32x4 - -// ApproximateReciprocal computes an approximate reciprocal of each element. -// -// Asm: VRCP14PS, CPU Feature: AVX512EVEX -func (x Float32x8) ApproximateReciprocal() Float32x8 - -// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. -// -// Asm: VRSQRTPS, CPU Feature: AVX -func (x Float32x8) ApproximateReciprocalOfSqrt() Float32x8 - // Ceil rounds elements up to the nearest integer. // Const Immediate = 2. // // Asm: VROUNDPS, CPU Feature: AVX func (x Float32x8) Ceil() Float32x8 -// Floor rounds elements down to the nearest integer. -// Const Immediate = 1. -// -// Asm: VROUNDPS, CPU Feature: AVX -func (x Float32x8) Floor() Float32x8 - -// Round rounds elements to the nearest integer. -// Const Immediate = 0. -// -// Asm: VROUNDPS, CPU Feature: AVX -func (x Float32x8) Round() Float32x8 - -// Sqrt computes the square root of each element. -// -// Asm: VSQRTPS, CPU Feature: AVX -func (x Float32x8) Sqrt() Float32x8 - -// Trunc truncates elements towards zero. -// Const Immediate = 3. -// -// Asm: VROUNDPS, CPU Feature: AVX -func (x Float32x8) Trunc() Float32x8 - -// ApproximateReciprocal computes an approximate reciprocal of each element. -// -// Asm: VRCP14PD, CPU Feature: AVX512EVEX -func (x Float64x2) ApproximateReciprocal() Float64x2 - -// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. -// -// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX -func (x Float64x2) ApproximateReciprocalOfSqrt() Float64x2 - // Ceil rounds elements up to the nearest integer. // Const Immediate = 2. // // Asm: VROUNDPD, CPU Feature: AVX func (x Float64x2) Ceil() Float64x2 -// Floor rounds elements down to the nearest integer. -// Const Immediate = 1. -// -// Asm: VROUNDPD, CPU Feature: AVX -func (x Float64x2) Floor() Float64x2 - -// Round rounds elements to the nearest integer. -// Const Immediate = 0. -// -// Asm: VROUNDPD, CPU Feature: AVX -func (x Float64x2) Round() Float64x2 - -// Sqrt computes the square root of each element. -// -// Asm: VSQRTPD, CPU Feature: AVX -func (x Float64x2) Sqrt() Float64x2 - -// Trunc truncates elements towards zero. -// Const Immediate = 3. -// -// Asm: VROUNDPD, CPU Feature: AVX -func (x Float64x2) Trunc() Float64x2 - -// ApproximateReciprocal computes an approximate reciprocal of each element. -// -// Asm: VRCP14PD, CPU Feature: AVX512EVEX -func (x Float64x4) ApproximateReciprocal() Float64x4 - -// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. -// -// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX -func (x Float64x4) ApproximateReciprocalOfSqrt() Float64x4 - // Ceil rounds elements up to the nearest integer. // Const Immediate = 2. // // Asm: VROUNDPD, CPU Feature: AVX func (x Float64x4) Ceil() Float64x4 -// Floor rounds elements down to the nearest integer. -// Const Immediate = 1. -// -// Asm: VROUNDPD, CPU Feature: AVX -func (x Float64x4) Floor() Float64x4 - -// Round rounds elements to the nearest integer. -// Const Immediate = 0. -// -// Asm: VROUNDPD, CPU Feature: AVX -func (x Float64x4) Round() Float64x4 - -// Sqrt computes the square root of each element. -// -// Asm: VSQRTPD, CPU Feature: AVX -func (x Float64x4) Sqrt() Float64x4 - -// Trunc truncates elements towards zero. -// Const Immediate = 3. -// -// Asm: VROUNDPD, CPU Feature: AVX -func (x Float64x4) Trunc() Float64x4 - -// ApproximateReciprocal computes an approximate reciprocal of each element. -// -// Asm: VRCP14PD, CPU Feature: AVX512EVEX -func (x Float64x8) ApproximateReciprocal() Float64x8 - -// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. -// -// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX -func (x Float64x8) ApproximateReciprocalOfSqrt() Float64x8 - -// Sqrt computes the square root of each element. -// -// Asm: VSQRTPD, CPU Feature: AVX512EVEX -func (x Float64x8) Sqrt() Float64x8 - -// Absolute computes the absolute value of each element. -// -// Asm: VPABSW, CPU Feature: AVX2 -func (x Int16x16) Absolute() Int16x16 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTW, CPU Feature: AVX512EVEX -func (x Int16x16) PopCount() Int16x16 - -// Absolute computes the absolute value of each element. -// -// Asm: VPABSW, CPU Feature: AVX512EVEX -func (x Int16x32) Absolute() Int16x32 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTW, CPU Feature: AVX512EVEX -func (x Int16x32) PopCount() Int16x32 - -// Absolute computes the absolute value of each element. -// -// Asm: VPABSW, CPU Feature: AVX -func (x Int16x8) Absolute() Int16x8 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTW, CPU Feature: AVX512EVEX -func (x Int16x8) PopCount() Int16x8 - -// Absolute computes the absolute value of each element. -// -// Asm: VPABSD, CPU Feature: AVX512EVEX -func (x Int32x16) Absolute() Int32x16 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTD, CPU Feature: AVX512EVEX -func (x Int32x16) PopCount() Int32x16 - -// Absolute computes the absolute value of each element. -// -// Asm: VPABSD, CPU Feature: AVX -func (x Int32x4) Absolute() Int32x4 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTD, CPU Feature: AVX512EVEX -func (x Int32x4) PopCount() Int32x4 - -// Absolute computes the absolute value of each element. -// -// Asm: VPABSD, CPU Feature: AVX2 -func (x Int32x8) Absolute() Int32x8 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTD, CPU Feature: AVX512EVEX -func (x Int32x8) PopCount() Int32x8 - -// Absolute computes the absolute value of each element. -// -// Asm: VPABSQ, CPU Feature: AVX512EVEX -func (x Int64x2) Absolute() Int64x2 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX -func (x Int64x2) PopCount() Int64x2 - -// Absolute computes the absolute value of each element. -// -// Asm: VPABSQ, CPU Feature: AVX512EVEX -func (x Int64x4) Absolute() Int64x4 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX -func (x Int64x4) PopCount() Int64x4 - -// Absolute computes the absolute value of each element. -// -// Asm: VPABSQ, CPU Feature: AVX512EVEX -func (x Int64x8) Absolute() Int64x8 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX -func (x Int64x8) PopCount() Int64x8 - -// Absolute computes the absolute value of each element. -// -// Asm: VPABSB, CPU Feature: AVX -func (x Int8x16) Absolute() Int8x16 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTB, CPU Feature: AVX512EVEX -func (x Int8x16) PopCount() Int8x16 - -// Absolute computes the absolute value of each element. -// -// Asm: VPABSB, CPU Feature: AVX2 -func (x Int8x32) Absolute() Int8x32 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTB, CPU Feature: AVX512EVEX -func (x Int8x32) PopCount() Int8x32 - -// Absolute computes the absolute value of each element. -// -// Asm: VPABSB, CPU Feature: AVX512EVEX -func (x Int8x64) Absolute() Int8x64 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTB, CPU Feature: AVX512EVEX -func (x Int8x64) PopCount() Int8x64 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTW, CPU Feature: AVX512EVEX -func (x Uint16x16) PopCount() Uint16x16 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTW, CPU Feature: AVX512EVEX -func (x Uint16x32) PopCount() Uint16x32 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTW, CPU Feature: AVX512EVEX -func (x Uint16x8) PopCount() Uint16x8 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTD, CPU Feature: AVX512EVEX -func (x Uint32x16) PopCount() Uint32x16 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTD, CPU Feature: AVX512EVEX -func (x Uint32x4) PopCount() Uint32x4 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTD, CPU Feature: AVX512EVEX -func (x Uint32x8) PopCount() Uint32x8 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX -func (x Uint64x2) PopCount() Uint64x2 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX -func (x Uint64x4) PopCount() Uint64x4 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX -func (x Uint64x8) PopCount() Uint64x8 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTB, CPU Feature: AVX512EVEX -func (x Uint8x16) PopCount() Uint8x16 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTB, CPU Feature: AVX512EVEX -func (x Uint8x32) PopCount() Uint8x32 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTB, CPU Feature: AVX512EVEX -func (x Uint8x64) PopCount() Uint8x64 - -// Add adds corresponding elements of two vectors. -// -// Asm: VADDPS, CPU Feature: AVX512EVEX -func (x Float32x16) Add(y Float32x16) Float32x16 - -// And performs a masked bitwise AND operation between two vectors. -// -// Asm: VANDPS, CPU Feature: AVX512EVEX -func (x Float32x16) And(y Float32x16) Float32x16 - -// AndNot performs a masked bitwise AND NOT operation between two vectors. -// -// Asm: VANDNPS, CPU Feature: AVX512EVEX -func (x Float32x16) AndNot(y Float32x16) Float32x16 - -// Div divides elements of two vectors. -// -// Asm: VDIVPS, CPU Feature: AVX512EVEX -func (x Float32x16) Div(y Float32x16) Float32x16 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x16) Equal(y Float32x16) Mask32x16 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x16) Greater(y Float32x16) Mask32x16 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x16) GreaterEqual(y Float32x16) Mask32x16 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// Const Immediate = 3. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x16) IsNan(y Float32x16) Mask32x16 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x16) Less(y Float32x16) Mask32x16 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x16) LessEqual(y Float32x16) Mask32x16 - -// ApproximateReciprocal computes an approximate reciprocal of each element. -// -// Asm: VRCP14PS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedApproximateReciprocal(y Mask32x16) Float32x16 - -// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. -// -// Asm: VRSQRT14PS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedApproximateReciprocalOfSqrt(y Mask32x16) Float32x16 - -// Sqrt computes the square root of each element. -// -// Asm: VSQRTPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedSqrt(y Mask32x16) Float32x16 - -// Max computes the maximum of corresponding elements. -// -// Asm: VMAXPS, CPU Feature: AVX512EVEX -func (x Float32x16) Max(y Float32x16) Float32x16 - -// Min computes the minimum of corresponding elements. -// -// Asm: VMINPS, CPU Feature: AVX512EVEX -func (x Float32x16) Min(y Float32x16) Float32x16 - -// Mul multiplies corresponding elements of two vectors, masked. -// -// Asm: VMULPS, CPU Feature: AVX512EVEX -func (x Float32x16) Mul(y Float32x16) Float32x16 - -// MulByPowOf2 multiplies elements by a power of 2. -// -// Asm: VSCALEFPS, CPU Feature: AVX512EVEX -func (x Float32x16) MulByPowOf2(y Float32x16) Float32x16 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x16) NotEqual(y Float32x16) Mask32x16 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VORPS, CPU Feature: AVX512EVEX -func (x Float32x16) Or(y Float32x16) Float32x16 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VADDPS, CPU Feature: AVX512EVEX -func (x Float32x16) Sub(y Float32x16) Float32x16 - -// Xor performs a masked bitwise XOR operation between two vectors. -// -// Asm: VXORPS, CPU Feature: AVX512EVEX -func (x Float32x16) Xor(y Float32x16) Float32x16 - -// Add adds corresponding elements of two vectors. -// -// Asm: VADDPS, CPU Feature: AVX -func (x Float32x4) Add(y Float32x4) Float32x4 - -// AddSub subtracts even elements and adds odd elements of two vectors. -// -// Asm: VADDSUBPS, CPU Feature: AVX -func (x Float32x4) AddSub(y Float32x4) Float32x4 - -// And performs a bitwise AND operation between two vectors. -// -// Asm: VANDPS, CPU Feature: AVX -func (x Float32x4) And(y Float32x4) Float32x4 - -// AndNot performs a bitwise AND NOT operation between two vectors. -// -// Asm: VANDNPS, CPU Feature: AVX -func (x Float32x4) AndNot(y Float32x4) Float32x4 - -// Div divides elements of two vectors. -// -// Asm: VDIVPS, CPU Feature: AVX -func (x Float32x4) Div(y Float32x4) Float32x4 - -// Equal compares for equality. -// Const Immediate = 0. -// -// Asm: VCMPPS, CPU Feature: AVX -func (x Float32x4) Equal(y Float32x4) Mask32x4 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VCMPPS, CPU Feature: AVX -func (x Float32x4) Greater(y Float32x4) Mask32x4 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VCMPPS, CPU Feature: AVX -func (x Float32x4) GreaterEqual(y Float32x4) Mask32x4 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// Const Immediate = 3. -// -// Asm: VCMPPS, CPU Feature: AVX -func (x Float32x4) IsNan(y Float32x4) Mask32x4 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VCMPPS, CPU Feature: AVX -func (x Float32x4) Less(y Float32x4) Mask32x4 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VCMPPS, CPU Feature: AVX -func (x Float32x4) LessEqual(y Float32x4) Mask32x4 - -// ApproximateReciprocal computes an approximate reciprocal of each element. -// -// Asm: VRCP14PS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedApproximateReciprocal(y Mask32x4) Float32x4 - -// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. -// -// Asm: VRSQRT14PS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedApproximateReciprocalOfSqrt(y Mask32x4) Float32x4 - -// Sqrt computes the square root of each element. -// -// Asm: VSQRTPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedSqrt(y Mask32x4) Float32x4 - -// Max computes the maximum of corresponding elements. -// -// Asm: VMAXPS, CPU Feature: AVX -func (x Float32x4) Max(y Float32x4) Float32x4 - -// Min computes the minimum of corresponding elements. -// -// Asm: VMINPS, CPU Feature: AVX -func (x Float32x4) Min(y Float32x4) Float32x4 - -// Mul multiplies corresponding elements of two vectors. -// -// Asm: VMULPS, CPU Feature: AVX -func (x Float32x4) Mul(y Float32x4) Float32x4 - -// MulByPowOf2 multiplies elements by a power of 2. -// -// Asm: VSCALEFPS, CPU Feature: AVX512EVEX -func (x Float32x4) MulByPowOf2(y Float32x4) Float32x4 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VCMPPS, CPU Feature: AVX -func (x Float32x4) NotEqual(y Float32x4) Mask32x4 - -// Or performs a bitwise OR operation between two vectors. -// -// Asm: VORPS, CPU Feature: AVX -func (x Float32x4) Or(y Float32x4) Float32x4 - -// PairwiseAdd horizontally adds adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. -// -// Asm: VHADDPS, CPU Feature: AVX -func (x Float32x4) PairwiseAdd(y Float32x4) Float32x4 - -// PairwiseSub horizontally subtracts adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. -// -// Asm: VHSUBPS, CPU Feature: AVX -func (x Float32x4) PairwiseSub(y Float32x4) Float32x4 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VADDPS, CPU Feature: AVX -func (x Float32x4) Sub(y Float32x4) Float32x4 - -// Xor performs a bitwise XOR operation between two vectors. -// -// Asm: VXORPS, CPU Feature: AVX -func (x Float32x4) Xor(y Float32x4) Float32x4 - -// Add adds corresponding elements of two vectors. -// -// Asm: VADDPS, CPU Feature: AVX -func (x Float32x8) Add(y Float32x8) Float32x8 - -// AddSub subtracts even elements and adds odd elements of two vectors. -// -// Asm: VADDSUBPS, CPU Feature: AVX -func (x Float32x8) AddSub(y Float32x8) Float32x8 - -// And performs a bitwise AND operation between two vectors. -// -// Asm: VANDPS, CPU Feature: AVX -func (x Float32x8) And(y Float32x8) Float32x8 - -// AndNot performs a bitwise AND NOT operation between two vectors. -// -// Asm: VANDNPS, CPU Feature: AVX -func (x Float32x8) AndNot(y Float32x8) Float32x8 - -// Div divides elements of two vectors. -// -// Asm: VDIVPS, CPU Feature: AVX -func (x Float32x8) Div(y Float32x8) Float32x8 - -// Equal compares for equality. -// Const Immediate = 0. -// -// Asm: VCMPPS, CPU Feature: AVX -func (x Float32x8) Equal(y Float32x8) Mask32x8 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VCMPPS, CPU Feature: AVX -func (x Float32x8) Greater(y Float32x8) Mask32x8 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VCMPPS, CPU Feature: AVX -func (x Float32x8) GreaterEqual(y Float32x8) Mask32x8 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// Const Immediate = 3. -// -// Asm: VCMPPS, CPU Feature: AVX -func (x Float32x8) IsNan(y Float32x8) Mask32x8 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VCMPPS, CPU Feature: AVX -func (x Float32x8) Less(y Float32x8) Mask32x8 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VCMPPS, CPU Feature: AVX -func (x Float32x8) LessEqual(y Float32x8) Mask32x8 - -// ApproximateReciprocal computes an approximate reciprocal of each element. -// -// Asm: VRCP14PS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedApproximateReciprocal(y Mask32x8) Float32x8 - -// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. -// -// Asm: VRSQRT14PS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedApproximateReciprocalOfSqrt(y Mask32x8) Float32x8 - -// Sqrt computes the square root of each element. -// -// Asm: VSQRTPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedSqrt(y Mask32x8) Float32x8 - -// Max computes the maximum of corresponding elements. -// -// Asm: VMAXPS, CPU Feature: AVX -func (x Float32x8) Max(y Float32x8) Float32x8 - -// Min computes the minimum of corresponding elements. -// -// Asm: VMINPS, CPU Feature: AVX -func (x Float32x8) Min(y Float32x8) Float32x8 - -// Mul multiplies corresponding elements of two vectors. -// -// Asm: VMULPS, CPU Feature: AVX -func (x Float32x8) Mul(y Float32x8) Float32x8 - -// MulByPowOf2 multiplies elements by a power of 2. -// -// Asm: VSCALEFPS, CPU Feature: AVX512EVEX -func (x Float32x8) MulByPowOf2(y Float32x8) Float32x8 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VCMPPS, CPU Feature: AVX -func (x Float32x8) NotEqual(y Float32x8) Mask32x8 - -// Or performs a bitwise OR operation between two vectors. -// -// Asm: VORPS, CPU Feature: AVX -func (x Float32x8) Or(y Float32x8) Float32x8 - -// PairwiseAdd horizontally adds adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. -// -// Asm: VHADDPS, CPU Feature: AVX -func (x Float32x8) PairwiseAdd(y Float32x8) Float32x8 - -// PairwiseSub horizontally subtracts adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. -// -// Asm: VHSUBPS, CPU Feature: AVX -func (x Float32x8) PairwiseSub(y Float32x8) Float32x8 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VADDPS, CPU Feature: AVX -func (x Float32x8) Sub(y Float32x8) Float32x8 - -// Xor performs a bitwise XOR operation between two vectors. -// -// Asm: VXORPS, CPU Feature: AVX -func (x Float32x8) Xor(y Float32x8) Float32x8 - -// Add adds corresponding elements of two vectors. -// -// Asm: VADDPD, CPU Feature: AVX -func (x Float64x2) Add(y Float64x2) Float64x2 - -// AddSub subtracts even elements and adds odd elements of two vectors. -// -// Asm: VADDSUBPD, CPU Feature: AVX -func (x Float64x2) AddSub(y Float64x2) Float64x2 - -// And performs a bitwise AND operation between two vectors. -// -// Asm: VANDPD, CPU Feature: AVX -func (x Float64x2) And(y Float64x2) Float64x2 - -// AndNot performs a bitwise AND NOT operation between two vectors. -// -// Asm: VANDNPD, CPU Feature: AVX -func (x Float64x2) AndNot(y Float64x2) Float64x2 - -// Div divides elements of two vectors. -// -// Asm: VDIVPD, CPU Feature: AVX -func (x Float64x2) Div(y Float64x2) Float64x2 - -// DotProdBroadcast multiplies all elements and broadcasts the sum. -// Const Immediate = 127. -// -// Asm: VDPPD, CPU Feature: AVX -func (x Float64x2) DotProdBroadcast(y Float64x2) Float64x2 - -// Equal compares for equality. -// Const Immediate = 0. -// -// Asm: VCMPPD, CPU Feature: AVX -func (x Float64x2) Equal(y Float64x2) Mask64x2 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VCMPPD, CPU Feature: AVX -func (x Float64x2) Greater(y Float64x2) Mask64x2 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VCMPPD, CPU Feature: AVX -func (x Float64x2) GreaterEqual(y Float64x2) Mask64x2 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// Const Immediate = 3. -// -// Asm: VCMPPD, CPU Feature: AVX -func (x Float64x2) IsNan(y Float64x2) Mask64x2 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VCMPPD, CPU Feature: AVX -func (x Float64x2) Less(y Float64x2) Mask64x2 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VCMPPD, CPU Feature: AVX -func (x Float64x2) LessEqual(y Float64x2) Mask64x2 - -// ApproximateReciprocal computes an approximate reciprocal of each element. -// -// Asm: VRCP14PD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedApproximateReciprocal(y Mask64x2) Float64x2 - -// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. -// -// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedApproximateReciprocalOfSqrt(y Mask64x2) Float64x2 - -// Sqrt computes the square root of each element. -// -// Asm: VSQRTPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedSqrt(y Mask64x2) Float64x2 - -// Max computes the maximum of corresponding elements. -// -// Asm: VMAXPD, CPU Feature: AVX -func (x Float64x2) Max(y Float64x2) Float64x2 - -// Min computes the minimum of corresponding elements. -// -// Asm: VMINPD, CPU Feature: AVX -func (x Float64x2) Min(y Float64x2) Float64x2 - -// Mul multiplies corresponding elements of two vectors. -// -// Asm: VMULPD, CPU Feature: AVX -func (x Float64x2) Mul(y Float64x2) Float64x2 - -// MulByPowOf2 multiplies elements by a power of 2. -// -// Asm: VSCALEFPD, CPU Feature: AVX512EVEX -func (x Float64x2) MulByPowOf2(y Float64x2) Float64x2 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VCMPPD, CPU Feature: AVX -func (x Float64x2) NotEqual(y Float64x2) Mask64x2 - -// Or performs a bitwise OR operation between two vectors. -// -// Asm: VORPD, CPU Feature: AVX -func (x Float64x2) Or(y Float64x2) Float64x2 - -// PairwiseAdd horizontally adds adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. -// -// Asm: VHADDPD, CPU Feature: AVX -func (x Float64x2) PairwiseAdd(y Float64x2) Float64x2 - -// PairwiseSub horizontally subtracts adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. -// -// Asm: VHSUBPD, CPU Feature: AVX -func (x Float64x2) PairwiseSub(y Float64x2) Float64x2 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VADDPD, CPU Feature: AVX -func (x Float64x2) Sub(y Float64x2) Float64x2 - -// Xor performs a bitwise XOR operation between two vectors. -// -// Asm: VXORPD, CPU Feature: AVX -func (x Float64x2) Xor(y Float64x2) Float64x2 - -// Add adds corresponding elements of two vectors. -// -// Asm: VADDPD, CPU Feature: AVX -func (x Float64x4) Add(y Float64x4) Float64x4 - -// AddSub subtracts even elements and adds odd elements of two vectors. -// -// Asm: VADDSUBPD, CPU Feature: AVX -func (x Float64x4) AddSub(y Float64x4) Float64x4 - -// And performs a bitwise AND operation between two vectors. -// -// Asm: VANDPD, CPU Feature: AVX -func (x Float64x4) And(y Float64x4) Float64x4 - -// AndNot performs a bitwise AND NOT operation between two vectors. -// -// Asm: VANDNPD, CPU Feature: AVX -func (x Float64x4) AndNot(y Float64x4) Float64x4 - -// Div divides elements of two vectors. -// -// Asm: VDIVPD, CPU Feature: AVX -func (x Float64x4) Div(y Float64x4) Float64x4 - -// Equal compares for equality. -// Const Immediate = 0. -// -// Asm: VCMPPD, CPU Feature: AVX -func (x Float64x4) Equal(y Float64x4) Mask64x4 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VCMPPD, CPU Feature: AVX -func (x Float64x4) Greater(y Float64x4) Mask64x4 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VCMPPD, CPU Feature: AVX -func (x Float64x4) GreaterEqual(y Float64x4) Mask64x4 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// Const Immediate = 3. -// -// Asm: VCMPPD, CPU Feature: AVX -func (x Float64x4) IsNan(y Float64x4) Mask64x4 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VCMPPD, CPU Feature: AVX -func (x Float64x4) Less(y Float64x4) Mask64x4 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VCMPPD, CPU Feature: AVX -func (x Float64x4) LessEqual(y Float64x4) Mask64x4 - -// ApproximateReciprocal computes an approximate reciprocal of each element. -// -// Asm: VRCP14PD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedApproximateReciprocal(y Mask64x4) Float64x4 - -// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. -// -// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedApproximateReciprocalOfSqrt(y Mask64x4) Float64x4 - -// Sqrt computes the square root of each element. -// -// Asm: VSQRTPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedSqrt(y Mask64x4) Float64x4 - -// Max computes the maximum of corresponding elements. -// -// Asm: VMAXPD, CPU Feature: AVX -func (x Float64x4) Max(y Float64x4) Float64x4 - -// Min computes the minimum of corresponding elements. -// -// Asm: VMINPD, CPU Feature: AVX -func (x Float64x4) Min(y Float64x4) Float64x4 - -// Mul multiplies corresponding elements of two vectors. -// -// Asm: VMULPD, CPU Feature: AVX -func (x Float64x4) Mul(y Float64x4) Float64x4 - -// MulByPowOf2 multiplies elements by a power of 2. -// -// Asm: VSCALEFPD, CPU Feature: AVX512EVEX -func (x Float64x4) MulByPowOf2(y Float64x4) Float64x4 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VCMPPD, CPU Feature: AVX -func (x Float64x4) NotEqual(y Float64x4) Mask64x4 - -// Or performs a bitwise OR operation between two vectors. -// -// Asm: VORPD, CPU Feature: AVX -func (x Float64x4) Or(y Float64x4) Float64x4 - -// PairwiseAdd horizontally adds adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. -// -// Asm: VHADDPD, CPU Feature: AVX -func (x Float64x4) PairwiseAdd(y Float64x4) Float64x4 - -// PairwiseSub horizontally subtracts adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. -// -// Asm: VHSUBPD, CPU Feature: AVX -func (x Float64x4) PairwiseSub(y Float64x4) Float64x4 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VADDPD, CPU Feature: AVX -func (x Float64x4) Sub(y Float64x4) Float64x4 - -// Xor performs a bitwise XOR operation between two vectors. -// -// Asm: VXORPD, CPU Feature: AVX -func (x Float64x4) Xor(y Float64x4) Float64x4 - -// Add adds corresponding elements of two vectors. -// -// Asm: VADDPD, CPU Feature: AVX512EVEX -func (x Float64x8) Add(y Float64x8) Float64x8 - -// And performs a masked bitwise AND operation between two vectors. -// -// Asm: VANDPD, CPU Feature: AVX512EVEX -func (x Float64x8) And(y Float64x8) Float64x8 - -// AndNot performs a masked bitwise AND NOT operation between two vectors. -// -// Asm: VANDNPD, CPU Feature: AVX512EVEX -func (x Float64x8) AndNot(y Float64x8) Float64x8 - -// Div divides elements of two vectors. -// -// Asm: VDIVPD, CPU Feature: AVX512EVEX -func (x Float64x8) Div(y Float64x8) Float64x8 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x8) Equal(y Float64x8) Mask64x8 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x8) Greater(y Float64x8) Mask64x8 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x8) GreaterEqual(y Float64x8) Mask64x8 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// Const Immediate = 3. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x8) IsNan(y Float64x8) Mask64x8 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x8) Less(y Float64x8) Mask64x8 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x8) LessEqual(y Float64x8) Mask64x8 - -// ApproximateReciprocal computes an approximate reciprocal of each element. -// -// Asm: VRCP14PD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedApproximateReciprocal(y Mask64x8) Float64x8 - -// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. -// -// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedApproximateReciprocalOfSqrt(y Mask64x8) Float64x8 - -// Sqrt computes the square root of each element. -// -// Asm: VSQRTPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedSqrt(y Mask64x8) Float64x8 - -// Max computes the maximum of corresponding elements. -// -// Asm: VMAXPD, CPU Feature: AVX512EVEX -func (x Float64x8) Max(y Float64x8) Float64x8 - -// Min computes the minimum of corresponding elements. -// -// Asm: VMINPD, CPU Feature: AVX512EVEX -func (x Float64x8) Min(y Float64x8) Float64x8 - -// Mul multiplies corresponding elements of two vectors, masked. -// -// Asm: VMULPD, CPU Feature: AVX512EVEX -func (x Float64x8) Mul(y Float64x8) Float64x8 - -// MulByPowOf2 multiplies elements by a power of 2. -// -// Asm: VSCALEFPD, CPU Feature: AVX512EVEX -func (x Float64x8) MulByPowOf2(y Float64x8) Float64x8 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x8) NotEqual(y Float64x8) Mask64x8 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VORPD, CPU Feature: AVX512EVEX -func (x Float64x8) Or(y Float64x8) Float64x8 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VADDPD, CPU Feature: AVX512EVEX -func (x Float64x8) Sub(y Float64x8) Float64x8 - -// Xor performs a masked bitwise XOR operation between two vectors. -// -// Asm: VXORPD, CPU Feature: AVX512EVEX -func (x Float64x8) Xor(y Float64x8) Float64x8 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDW, CPU Feature: AVX2 -func (x Int16x16) Add(y Int16x16) Int16x16 - -// And performs a bitwise AND operation between two vectors. -// -// Asm: VPAND, CPU Feature: AVX2 -func (x Int16x16) And(y Int16x16) Int16x16 - -// AndNot performs a bitwise AND NOT operation between two vectors. -// -// Asm: VPANDN, CPU Feature: AVX2 -func (x Int16x16) AndNot(y Int16x16) Int16x16 - -// Equal compares for equality. -// Const Immediate = 0. -// -// Asm: VPCMPEQW, CPU Feature: AVX2 -func (x Int16x16) Equal(y Int16x16) Mask16x16 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPGTW, CPU Feature: AVX2 -func (x Int16x16) Greater(y Int16x16) Mask16x16 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x16) GreaterEqual(y Int16x16) Mask16x16 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x16) Less(y Int16x16) Mask16x16 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x16) LessEqual(y Int16x16) Mask16x16 - -// Absolute computes the absolute value of each element. -// -// Asm: VPABSW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedAbsolute(y Mask16x16) Int16x16 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedPopCount(y Mask16x16) Int16x16 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSW, CPU Feature: AVX2 -func (x Int16x16) Max(y Int16x16) Int16x16 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSW, CPU Feature: AVX2 -func (x Int16x16) Min(y Int16x16) Int16x16 - -// MulHigh multiplies elements and stores the high part of the result. -// -// Asm: VPMULHW, CPU Feature: AVX2 -func (x Int16x16) MulHigh(y Int16x16) Int16x16 - -// MulLow multiplies elements and stores the low part of the result. -// -// Asm: VPMULLW, CPU Feature: AVX2 -func (x Int16x16) MulLow(y Int16x16) Int16x16 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x16) NotEqual(y Int16x16) Mask16x16 - -// Or performs a bitwise OR operation between two vectors. -// -// Asm: VPOR, CPU Feature: AVX2 -func (x Int16x16) Or(y Int16x16) Int16x16 - -// PairDotProd multiplies the elements and add the pairs together, -// yielding a vector of half as many elements with twice the input element size. -// -// Asm: VPMADDWD, CPU Feature: AVX2 -func (x Int16x16) PairDotProd(y Int16x16) Int32x8 - -// PairwiseAdd horizontally adds adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. -// -// Asm: VPHADDW, CPU Feature: AVX2 -func (x Int16x16) PairwiseAdd(y Int16x16) Int16x16 - -// PairwiseSub horizontally subtracts adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. -// -// Asm: VPHSUBW, CPU Feature: AVX2 -func (x Int16x16) PairwiseSub(y Int16x16) Int16x16 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSW, CPU Feature: AVX2 -func (x Int16x16) SaturatedAdd(y Int16x16) Int16x16 - -// SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. -// -// Asm: VPHADDSW, CPU Feature: AVX2 -func (x Int16x16) SaturatedPairwiseAdd(y Int16x16) Int16x16 - -// SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. -// -// Asm: VPHSUBSW, CPU Feature: AVX2 -func (x Int16x16) SaturatedPairwiseSub(y Int16x16) Int16x16 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSW, CPU Feature: AVX2 -func (x Int16x16) SaturatedSub(y Int16x16) Int16x16 - -// Sign returns the product of the first operand with -1, 0, or 1, -// whichever constant is nearest to the value of the second operand. -// -// Asm: VPSIGNW, CPU Feature: AVX2 -func (x Int16x16) Sign(y Int16x16) Int16x16 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBW, CPU Feature: AVX2 -func (x Int16x16) Sub(y Int16x16) Int16x16 - -// Xor performs a bitwise XOR operation between two vectors. -// -// Asm: VPXOR, CPU Feature: AVX2 -func (x Int16x16) Xor(y Int16x16) Int16x16 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDW, CPU Feature: AVX512EVEX -func (x Int16x32) Add(y Int16x32) Int16x32 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPEQW, CPU Feature: AVX512EVEX -func (x Int16x32) Equal(y Int16x32) Mask16x32 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPGTW, CPU Feature: AVX512EVEX -func (x Int16x32) Greater(y Int16x32) Mask16x32 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x32) GreaterEqual(y Int16x32) Mask16x32 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x32) Less(y Int16x32) Mask16x32 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x32) LessEqual(y Int16x32) Mask16x32 - -// Absolute computes the absolute value of each element. -// -// Asm: VPABSW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedAbsolute(y Mask16x32) Int16x32 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedPopCount(y Mask16x32) Int16x32 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSW, CPU Feature: AVX512EVEX -func (x Int16x32) Max(y Int16x32) Int16x32 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSW, CPU Feature: AVX512EVEX -func (x Int16x32) Min(y Int16x32) Int16x32 - -// MulHigh multiplies elements and stores the high part of the result, masked. -// -// Asm: VPMULHW, CPU Feature: AVX512EVEX -func (x Int16x32) MulHigh(y Int16x32) Int16x32 - -// MulLow multiplies elements and stores the low part of the result, masked. -// -// Asm: VPMULLW, CPU Feature: AVX512EVEX -func (x Int16x32) MulLow(y Int16x32) Int16x32 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x32) NotEqual(y Int16x32) Mask16x32 - -// PairDotProd multiplies the elements and add the pairs together, -// yielding a vector of half as many elements with twice the input element size. -// -// Asm: VPMADDWD, CPU Feature: AVX512EVEX -func (x Int16x32) PairDotProd(y Int16x32) Int32x16 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSW, CPU Feature: AVX512EVEX -func (x Int16x32) SaturatedAdd(y Int16x32) Int16x32 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSW, CPU Feature: AVX512EVEX -func (x Int16x32) SaturatedSub(y Int16x32) Int16x32 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBW, CPU Feature: AVX512EVEX -func (x Int16x32) Sub(y Int16x32) Int16x32 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDW, CPU Feature: AVX -func (x Int16x8) Add(y Int16x8) Int16x8 - -// And performs a bitwise AND operation between two vectors. -// -// Asm: VPAND, CPU Feature: AVX -func (x Int16x8) And(y Int16x8) Int16x8 - -// AndNot performs a bitwise AND NOT operation between two vectors. -// -// Asm: VPANDN, CPU Feature: AVX -func (x Int16x8) AndNot(y Int16x8) Int16x8 - -// Equal compares for equality. -// Const Immediate = 0. -// -// Asm: VPCMPEQW, CPU Feature: AVX -func (x Int16x8) Equal(y Int16x8) Mask16x8 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPGTW, CPU Feature: AVX -func (x Int16x8) Greater(y Int16x8) Mask16x8 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x8) GreaterEqual(y Int16x8) Mask16x8 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x8) Less(y Int16x8) Mask16x8 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x8) LessEqual(y Int16x8) Mask16x8 - -// Absolute computes the absolute value of each element. -// -// Asm: VPABSW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedAbsolute(y Mask16x8) Int16x8 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedPopCount(y Mask16x8) Int16x8 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSW, CPU Feature: AVX -func (x Int16x8) Max(y Int16x8) Int16x8 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSW, CPU Feature: AVX -func (x Int16x8) Min(y Int16x8) Int16x8 - -// MulHigh multiplies elements and stores the high part of the result. -// -// Asm: VPMULHW, CPU Feature: AVX -func (x Int16x8) MulHigh(y Int16x8) Int16x8 - -// MulLow multiplies elements and stores the low part of the result. -// -// Asm: VPMULLW, CPU Feature: AVX -func (x Int16x8) MulLow(y Int16x8) Int16x8 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x8) NotEqual(y Int16x8) Mask16x8 - -// Or performs a bitwise OR operation between two vectors. -// -// Asm: VPOR, CPU Feature: AVX -func (x Int16x8) Or(y Int16x8) Int16x8 - -// PairDotProd multiplies the elements and add the pairs together, -// yielding a vector of half as many elements with twice the input element size. -// -// Asm: VPMADDWD, CPU Feature: AVX -func (x Int16x8) PairDotProd(y Int16x8) Int32x4 - -// PairwiseAdd horizontally adds adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. -// -// Asm: VPHADDW, CPU Feature: AVX -func (x Int16x8) PairwiseAdd(y Int16x8) Int16x8 - -// PairwiseSub horizontally subtracts adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. -// -// Asm: VPHSUBW, CPU Feature: AVX -func (x Int16x8) PairwiseSub(y Int16x8) Int16x8 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSW, CPU Feature: AVX -func (x Int16x8) SaturatedAdd(y Int16x8) Int16x8 - -// SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. -// -// Asm: VPHADDSW, CPU Feature: AVX -func (x Int16x8) SaturatedPairwiseAdd(y Int16x8) Int16x8 - -// SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. -// -// Asm: VPHSUBSW, CPU Feature: AVX -func (x Int16x8) SaturatedPairwiseSub(y Int16x8) Int16x8 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSW, CPU Feature: AVX -func (x Int16x8) SaturatedSub(y Int16x8) Int16x8 - -// Sign returns the product of the first operand with -1, 0, or 1, -// whichever constant is nearest to the value of the second operand. -// -// Asm: VPSIGNW, CPU Feature: AVX -func (x Int16x8) Sign(y Int16x8) Int16x8 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBW, CPU Feature: AVX -func (x Int16x8) Sub(y Int16x8) Int16x8 - -// Xor performs a bitwise XOR operation between two vectors. -// -// Asm: VPXOR, CPU Feature: AVX -func (x Int16x8) Xor(y Int16x8) Int16x8 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDD, CPU Feature: AVX512EVEX -func (x Int32x16) Add(y Int32x16) Int32x16 - -// And performs a masked bitwise AND operation between two vectors. -// -// Asm: VPANDD, CPU Feature: AVX512EVEX -func (x Int32x16) And(y Int32x16) Int32x16 - -// AndNot performs a masked bitwise AND NOT operation between two vectors. -// -// Asm: VPANDND, CPU Feature: AVX512EVEX -func (x Int32x16) AndNot(y Int32x16) Int32x16 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPEQD, CPU Feature: AVX512EVEX -func (x Int32x16) Equal(y Int32x16) Mask32x16 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPGTD, CPU Feature: AVX512EVEX -func (x Int32x16) Greater(y Int32x16) Mask32x16 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x16) GreaterEqual(y Int32x16) Mask32x16 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x16) Less(y Int32x16) Mask32x16 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x16) LessEqual(y Int32x16) Mask32x16 - -// Absolute computes the absolute value of each element. -// -// Asm: VPABSD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedAbsolute(y Mask32x16) Int32x16 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedPopCount(y Mask32x16) Int32x16 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSD, CPU Feature: AVX512EVEX -func (x Int32x16) Max(y Int32x16) Int32x16 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSD, CPU Feature: AVX512EVEX -func (x Int32x16) Min(y Int32x16) Int32x16 - -// MulLow multiplies elements and stores the low part of the result, masked. -// -// Asm: VPMULLD, CPU Feature: AVX512EVEX -func (x Int32x16) MulLow(y Int32x16) Int32x16 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x16) NotEqual(y Int32x16) Mask32x16 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORD, CPU Feature: AVX512EVEX -func (x Int32x16) Or(y Int32x16) Int32x16 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBD, CPU Feature: AVX512EVEX -func (x Int32x16) Sub(y Int32x16) Int32x16 - -// Xor performs a masked bitwise XOR operation between two vectors. -// -// Asm: VPXORD, CPU Feature: AVX512EVEX -func (x Int32x16) Xor(y Int32x16) Int32x16 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDD, CPU Feature: AVX -func (x Int32x4) Add(y Int32x4) Int32x4 - -// And performs a bitwise AND operation between two vectors. -// -// Asm: VPAND, CPU Feature: AVX -func (x Int32x4) And(y Int32x4) Int32x4 - -// AndNot performs a bitwise AND NOT operation between two vectors. -// -// Asm: VPANDN, CPU Feature: AVX -func (x Int32x4) AndNot(y Int32x4) Int32x4 - -// Equal compares for equality. -// Const Immediate = 0. -// -// Asm: VPCMPEQD, CPU Feature: AVX -func (x Int32x4) Equal(y Int32x4) Mask32x4 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPGTD, CPU Feature: AVX -func (x Int32x4) Greater(y Int32x4) Mask32x4 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x4) GreaterEqual(y Int32x4) Mask32x4 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x4) Less(y Int32x4) Mask32x4 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x4) LessEqual(y Int32x4) Mask32x4 - -// Absolute computes the absolute value of each element. -// -// Asm: VPABSD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedAbsolute(y Mask32x4) Int32x4 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedPopCount(y Mask32x4) Int32x4 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSD, CPU Feature: AVX -func (x Int32x4) Max(y Int32x4) Int32x4 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSD, CPU Feature: AVX -func (x Int32x4) Min(y Int32x4) Int32x4 - -// MulEvenWiden multiplies even-indexed elements, widening the result. -// Result[i] = v1.Even[i] * v2.Even[i]. -// -// Asm: VPMULDQ, CPU Feature: AVX -func (x Int32x4) MulEvenWiden(y Int32x4) Int64x2 - -// MulLow multiplies elements and stores the low part of the result. -// -// Asm: VPMULLD, CPU Feature: AVX -func (x Int32x4) MulLow(y Int32x4) Int32x4 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x4) NotEqual(y Int32x4) Mask32x4 - -// Or performs a bitwise OR operation between two vectors. -// -// Asm: VPOR, CPU Feature: AVX -func (x Int32x4) Or(y Int32x4) Int32x4 - -// PairwiseAdd horizontally adds adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. -// -// Asm: VPHADDD, CPU Feature: AVX -func (x Int32x4) PairwiseAdd(y Int32x4) Int32x4 - -// PairwiseSub horizontally subtracts adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. -// -// Asm: VPHSUBD, CPU Feature: AVX -func (x Int32x4) PairwiseSub(y Int32x4) Int32x4 - -// Sign returns the product of the first operand with -1, 0, or 1, -// whichever constant is nearest to the value of the second operand. -// -// Asm: VPSIGND, CPU Feature: AVX -func (x Int32x4) Sign(y Int32x4) Int32x4 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBD, CPU Feature: AVX -func (x Int32x4) Sub(y Int32x4) Int32x4 - -// Xor performs a bitwise XOR operation between two vectors. -// -// Asm: VPXOR, CPU Feature: AVX -func (x Int32x4) Xor(y Int32x4) Int32x4 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDD, CPU Feature: AVX2 -func (x Int32x8) Add(y Int32x8) Int32x8 - -// And performs a bitwise AND operation between two vectors. -// -// Asm: VPAND, CPU Feature: AVX2 -func (x Int32x8) And(y Int32x8) Int32x8 - -// AndNot performs a bitwise AND NOT operation between two vectors. -// -// Asm: VPANDN, CPU Feature: AVX2 -func (x Int32x8) AndNot(y Int32x8) Int32x8 - -// Equal compares for equality. -// Const Immediate = 0. -// -// Asm: VPCMPEQD, CPU Feature: AVX2 -func (x Int32x8) Equal(y Int32x8) Mask32x8 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPGTD, CPU Feature: AVX2 -func (x Int32x8) Greater(y Int32x8) Mask32x8 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x8) GreaterEqual(y Int32x8) Mask32x8 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x8) Less(y Int32x8) Mask32x8 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x8) LessEqual(y Int32x8) Mask32x8 - -// Absolute computes the absolute value of each element. -// -// Asm: VPABSD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedAbsolute(y Mask32x8) Int32x8 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedPopCount(y Mask32x8) Int32x8 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSD, CPU Feature: AVX2 -func (x Int32x8) Max(y Int32x8) Int32x8 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSD, CPU Feature: AVX2 -func (x Int32x8) Min(y Int32x8) Int32x8 - -// MulEvenWiden multiplies even-indexed elements, widening the result. -// Result[i] = v1.Even[i] * v2.Even[i]. -// -// Asm: VPMULDQ, CPU Feature: AVX2 -func (x Int32x8) MulEvenWiden(y Int32x8) Int64x4 - -// MulLow multiplies elements and stores the low part of the result. -// -// Asm: VPMULLD, CPU Feature: AVX2 -func (x Int32x8) MulLow(y Int32x8) Int32x8 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x8) NotEqual(y Int32x8) Mask32x8 - -// Or performs a bitwise OR operation between two vectors. -// -// Asm: VPOR, CPU Feature: AVX2 -func (x Int32x8) Or(y Int32x8) Int32x8 - -// PairwiseAdd horizontally adds adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. -// -// Asm: VPHADDD, CPU Feature: AVX2 -func (x Int32x8) PairwiseAdd(y Int32x8) Int32x8 - -// PairwiseSub horizontally subtracts adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. -// -// Asm: VPHSUBD, CPU Feature: AVX2 -func (x Int32x8) PairwiseSub(y Int32x8) Int32x8 - -// Sign returns the product of the first operand with -1, 0, or 1, -// whichever constant is nearest to the value of the second operand. -// -// Asm: VPSIGND, CPU Feature: AVX2 -func (x Int32x8) Sign(y Int32x8) Int32x8 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBD, CPU Feature: AVX2 -func (x Int32x8) Sub(y Int32x8) Int32x8 - -// Xor performs a bitwise XOR operation between two vectors. -// -// Asm: VPXOR, CPU Feature: AVX2 -func (x Int32x8) Xor(y Int32x8) Int32x8 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDQ, CPU Feature: AVX -func (x Int64x2) Add(y Int64x2) Int64x2 - -// And performs a bitwise AND operation between two vectors. -// -// Asm: VPAND, CPU Feature: AVX -func (x Int64x2) And(y Int64x2) Int64x2 - -// AndNot performs a bitwise AND NOT operation between two vectors. -// -// Asm: VPANDN, CPU Feature: AVX -func (x Int64x2) AndNot(y Int64x2) Int64x2 - -// Equal compares for equality. -// Const Immediate = 0. -// -// Asm: VPCMPEQQ, CPU Feature: AVX -func (x Int64x2) Equal(y Int64x2) Mask64x2 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPGTQ, CPU Feature: AVX512EVEX -func (x Int64x2) Greater(y Int64x2) Mask64x2 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x2) GreaterEqual(y Int64x2) Mask64x2 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x2) Less(y Int64x2) Mask64x2 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x2) LessEqual(y Int64x2) Mask64x2 - -// Absolute computes the absolute value of each element. -// -// Asm: VPABSQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedAbsolute(y Mask64x2) Int64x2 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedPopCount(y Mask64x2) Int64x2 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSQ, CPU Feature: AVX512EVEX -func (x Int64x2) Max(y Int64x2) Int64x2 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSQ, CPU Feature: AVX512EVEX -func (x Int64x2) Min(y Int64x2) Int64x2 - -// MulEvenWiden multiplies even-indexed elements, widening the result, masked. -// Result[i] = v1.Even[i] * v2.Even[i]. -// -// Asm: VPMULDQ, CPU Feature: AVX512EVEX -func (x Int64x2) MulEvenWiden(y Int64x2) Int64x2 - -// MulLow multiplies elements and stores the low part of the result, masked. -// -// Asm: VPMULLQ, CPU Feature: AVX512EVEX -func (x Int64x2) MulLow(y Int64x2) Int64x2 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x2) NotEqual(y Int64x2) Mask64x2 - -// Or performs a bitwise OR operation between two vectors. -// -// Asm: VPOR, CPU Feature: AVX -func (x Int64x2) Or(y Int64x2) Int64x2 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBQ, CPU Feature: AVX -func (x Int64x2) Sub(y Int64x2) Int64x2 - -// Xor performs a bitwise XOR operation between two vectors. -// -// Asm: VPXOR, CPU Feature: AVX -func (x Int64x2) Xor(y Int64x2) Int64x2 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDQ, CPU Feature: AVX2 -func (x Int64x4) Add(y Int64x4) Int64x4 - -// And performs a bitwise AND operation between two vectors. -// -// Asm: VPAND, CPU Feature: AVX2 -func (x Int64x4) And(y Int64x4) Int64x4 - -// AndNot performs a bitwise AND NOT operation between two vectors. -// -// Asm: VPANDN, CPU Feature: AVX2 -func (x Int64x4) AndNot(y Int64x4) Int64x4 - -// Equal compares for equality. -// Const Immediate = 0. -// -// Asm: VPCMPEQQ, CPU Feature: AVX2 -func (x Int64x4) Equal(y Int64x4) Mask64x4 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPGTQ, CPU Feature: AVX2 -func (x Int64x4) Greater(y Int64x4) Mask64x4 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x4) GreaterEqual(y Int64x4) Mask64x4 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x4) Less(y Int64x4) Mask64x4 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x4) LessEqual(y Int64x4) Mask64x4 - -// Absolute computes the absolute value of each element. -// -// Asm: VPABSQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedAbsolute(y Mask64x4) Int64x4 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedPopCount(y Mask64x4) Int64x4 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSQ, CPU Feature: AVX512EVEX -func (x Int64x4) Max(y Int64x4) Int64x4 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSQ, CPU Feature: AVX512EVEX -func (x Int64x4) Min(y Int64x4) Int64x4 - -// MulEvenWiden multiplies even-indexed elements, widening the result, masked. -// Result[i] = v1.Even[i] * v2.Even[i]. -// -// Asm: VPMULDQ, CPU Feature: AVX512EVEX -func (x Int64x4) MulEvenWiden(y Int64x4) Int64x4 - -// MulLow multiplies elements and stores the low part of the result, masked. -// -// Asm: VPMULLQ, CPU Feature: AVX512EVEX -func (x Int64x4) MulLow(y Int64x4) Int64x4 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x4) NotEqual(y Int64x4) Mask64x4 - -// Or performs a bitwise OR operation between two vectors. -// -// Asm: VPOR, CPU Feature: AVX2 -func (x Int64x4) Or(y Int64x4) Int64x4 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBQ, CPU Feature: AVX2 -func (x Int64x4) Sub(y Int64x4) Int64x4 - -// Xor performs a bitwise XOR operation between two vectors. -// -// Asm: VPXOR, CPU Feature: AVX2 -func (x Int64x4) Xor(y Int64x4) Int64x4 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDQ, CPU Feature: AVX512EVEX -func (x Int64x8) Add(y Int64x8) Int64x8 - -// And performs a masked bitwise AND operation between two vectors. -// -// Asm: VPANDQ, CPU Feature: AVX512EVEX -func (x Int64x8) And(y Int64x8) Int64x8 - -// AndNot performs a masked bitwise AND NOT operation between two vectors. -// -// Asm: VPANDNQ, CPU Feature: AVX512EVEX -func (x Int64x8) AndNot(y Int64x8) Int64x8 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPEQQ, CPU Feature: AVX512EVEX -func (x Int64x8) Equal(y Int64x8) Mask64x8 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPGTQ, CPU Feature: AVX512EVEX -func (x Int64x8) Greater(y Int64x8) Mask64x8 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x8) GreaterEqual(y Int64x8) Mask64x8 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x8) Less(y Int64x8) Mask64x8 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x8) LessEqual(y Int64x8) Mask64x8 - -// Absolute computes the absolute value of each element. -// -// Asm: VPABSQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedAbsolute(y Mask64x8) Int64x8 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedPopCount(y Mask64x8) Int64x8 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSQ, CPU Feature: AVX512EVEX -func (x Int64x8) Max(y Int64x8) Int64x8 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSQ, CPU Feature: AVX512EVEX -func (x Int64x8) Min(y Int64x8) Int64x8 - -// MulEvenWiden multiplies even-indexed elements, widening the result, masked. -// Result[i] = v1.Even[i] * v2.Even[i]. -// -// Asm: VPMULDQ, CPU Feature: AVX512EVEX -func (x Int64x8) MulEvenWiden(y Int64x8) Int64x8 - -// MulLow multiplies elements and stores the low part of the result, masked. -// -// Asm: VPMULLQ, CPU Feature: AVX512EVEX -func (x Int64x8) MulLow(y Int64x8) Int64x8 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x8) NotEqual(y Int64x8) Mask64x8 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORQ, CPU Feature: AVX512EVEX -func (x Int64x8) Or(y Int64x8) Int64x8 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBQ, CPU Feature: AVX512EVEX -func (x Int64x8) Sub(y Int64x8) Int64x8 - -// Xor performs a masked bitwise XOR operation between two vectors. -// -// Asm: VPXORQ, CPU Feature: AVX512EVEX -func (x Int64x8) Xor(y Int64x8) Int64x8 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDB, CPU Feature: AVX -func (x Int8x16) Add(y Int8x16) Int8x16 - -// And performs a bitwise AND operation between two vectors. -// -// Asm: VPAND, CPU Feature: AVX -func (x Int8x16) And(y Int8x16) Int8x16 - -// AndNot performs a bitwise AND NOT operation between two vectors. -// -// Asm: VPANDN, CPU Feature: AVX -func (x Int8x16) AndNot(y Int8x16) Int8x16 - -// Equal compares for equality. -// Const Immediate = 0. -// -// Asm: VPCMPEQB, CPU Feature: AVX -func (x Int8x16) Equal(y Int8x16) Mask8x16 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPGTB, CPU Feature: AVX -func (x Int8x16) Greater(y Int8x16) Mask8x16 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x16) GreaterEqual(y Int8x16) Mask8x16 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x16) Less(y Int8x16) Mask8x16 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x16) LessEqual(y Int8x16) Mask8x16 - -// Absolute computes the absolute value of each element. -// -// Asm: VPABSB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedAbsolute(y Mask8x16) Int8x16 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedPopCount(y Mask8x16) Int8x16 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSB, CPU Feature: AVX -func (x Int8x16) Max(y Int8x16) Int8x16 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSB, CPU Feature: AVX -func (x Int8x16) Min(y Int8x16) Int8x16 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x16) NotEqual(y Int8x16) Mask8x16 - -// Or performs a bitwise OR operation between two vectors. -// -// Asm: VPOR, CPU Feature: AVX -func (x Int8x16) Or(y Int8x16) Int8x16 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSB, CPU Feature: AVX -func (x Int8x16) SaturatedAdd(y Int8x16) Int8x16 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSB, CPU Feature: AVX -func (x Int8x16) SaturatedSub(y Int8x16) Int8x16 - -// Sign returns the product of the first operand with -1, 0, or 1, -// whichever constant is nearest to the value of the second operand. -// -// Asm: VPSIGNB, CPU Feature: AVX -func (x Int8x16) Sign(y Int8x16) Int8x16 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBB, CPU Feature: AVX -func (x Int8x16) Sub(y Int8x16) Int8x16 - -// Xor performs a bitwise XOR operation between two vectors. -// -// Asm: VPXOR, CPU Feature: AVX -func (x Int8x16) Xor(y Int8x16) Int8x16 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDB, CPU Feature: AVX2 -func (x Int8x32) Add(y Int8x32) Int8x32 - -// And performs a bitwise AND operation between two vectors. -// -// Asm: VPAND, CPU Feature: AVX2 -func (x Int8x32) And(y Int8x32) Int8x32 - -// AndNot performs a bitwise AND NOT operation between two vectors. -// -// Asm: VPANDN, CPU Feature: AVX2 -func (x Int8x32) AndNot(y Int8x32) Int8x32 - -// Equal compares for equality. -// Const Immediate = 0. -// -// Asm: VPCMPEQB, CPU Feature: AVX2 -func (x Int8x32) Equal(y Int8x32) Mask8x32 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPGTB, CPU Feature: AVX2 -func (x Int8x32) Greater(y Int8x32) Mask8x32 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x32) GreaterEqual(y Int8x32) Mask8x32 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x32) Less(y Int8x32) Mask8x32 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x32) LessEqual(y Int8x32) Mask8x32 - -// Absolute computes the absolute value of each element. -// -// Asm: VPABSB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedAbsolute(y Mask8x32) Int8x32 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedPopCount(y Mask8x32) Int8x32 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSB, CPU Feature: AVX2 -func (x Int8x32) Max(y Int8x32) Int8x32 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSB, CPU Feature: AVX2 -func (x Int8x32) Min(y Int8x32) Int8x32 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x32) NotEqual(y Int8x32) Mask8x32 - -// Or performs a bitwise OR operation between two vectors. -// -// Asm: VPOR, CPU Feature: AVX2 -func (x Int8x32) Or(y Int8x32) Int8x32 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSB, CPU Feature: AVX2 -func (x Int8x32) SaturatedAdd(y Int8x32) Int8x32 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSB, CPU Feature: AVX2 -func (x Int8x32) SaturatedSub(y Int8x32) Int8x32 - -// Sign returns the product of the first operand with -1, 0, or 1, -// whichever constant is nearest to the value of the second operand. -// -// Asm: VPSIGNB, CPU Feature: AVX2 -func (x Int8x32) Sign(y Int8x32) Int8x32 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBB, CPU Feature: AVX2 -func (x Int8x32) Sub(y Int8x32) Int8x32 - -// Xor performs a bitwise XOR operation between two vectors. -// -// Asm: VPXOR, CPU Feature: AVX2 -func (x Int8x32) Xor(y Int8x32) Int8x32 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDB, CPU Feature: AVX512EVEX -func (x Int8x64) Add(y Int8x64) Int8x64 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPEQB, CPU Feature: AVX512EVEX -func (x Int8x64) Equal(y Int8x64) Mask8x64 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPGTB, CPU Feature: AVX512EVEX -func (x Int8x64) Greater(y Int8x64) Mask8x64 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x64) GreaterEqual(y Int8x64) Mask8x64 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x64) Less(y Int8x64) Mask8x64 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x64) LessEqual(y Int8x64) Mask8x64 - -// Absolute computes the absolute value of each element. -// -// Asm: VPABSB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedAbsolute(y Mask8x64) Int8x64 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedPopCount(y Mask8x64) Int8x64 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSB, CPU Feature: AVX512EVEX -func (x Int8x64) Max(y Int8x64) Int8x64 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSB, CPU Feature: AVX512EVEX -func (x Int8x64) Min(y Int8x64) Int8x64 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x64) NotEqual(y Int8x64) Mask8x64 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSB, CPU Feature: AVX512EVEX -func (x Int8x64) SaturatedAdd(y Int8x64) Int8x64 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSB, CPU Feature: AVX512EVEX -func (x Int8x64) SaturatedSub(y Int8x64) Int8x64 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBB, CPU Feature: AVX512EVEX -func (x Int8x64) Sub(y Int8x64) Int8x64 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDW, CPU Feature: AVX2 -func (x Uint16x16) Add(y Uint16x16) Uint16x16 - -// And performs a bitwise AND operation between two vectors. -// -// Asm: VPAND, CPU Feature: AVX2 -func (x Uint16x16) And(y Uint16x16) Uint16x16 - -// AndNot performs a bitwise AND NOT operation between two vectors. -// -// Asm: VPANDN, CPU Feature: AVX2 -func (x Uint16x16) AndNot(y Uint16x16) Uint16x16 - -// Average computes the rounded average of corresponding elements. -// -// Asm: VPAVGW, CPU Feature: AVX2 -func (x Uint16x16) Average(y Uint16x16) Uint16x16 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x16) Equal(y Uint16x16) Mask16x16 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x16) Greater(y Uint16x16) Mask16x16 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x16) GreaterEqual(y Uint16x16) Mask16x16 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x16) Less(y Uint16x16) Mask16x16 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedPopCount(y Mask16x16) Uint16x16 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUW, CPU Feature: AVX2 -func (x Uint16x16) Max(y Uint16x16) Uint16x16 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUW, CPU Feature: AVX2 -func (x Uint16x16) Min(y Uint16x16) Uint16x16 - -// MulHigh multiplies elements and stores the high part of the result. -// -// Asm: VPMULHUW, CPU Feature: AVX2 -func (x Uint16x16) MulHigh(y Uint16x16) Uint16x16 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x16) NotEqual(y Uint16x16) Mask16x16 - -// Or performs a bitwise OR operation between two vectors. -// -// Asm: VPOR, CPU Feature: AVX2 -func (x Uint16x16) Or(y Uint16x16) Uint16x16 - -// PairwiseAdd horizontally adds adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. -// -// Asm: VPHADDW, CPU Feature: AVX2 -func (x Uint16x16) PairwiseAdd(y Uint16x16) Uint16x16 - -// PairwiseSub horizontally subtracts adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. -// -// Asm: VPHSUBW, CPU Feature: AVX2 -func (x Uint16x16) PairwiseSub(y Uint16x16) Uint16x16 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSW, CPU Feature: AVX2 -func (x Uint16x16) SaturatedAdd(y Uint16x16) Uint16x16 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSW, CPU Feature: AVX2 -func (x Uint16x16) SaturatedSub(y Uint16x16) Uint16x16 - -// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, -// yielding a vector of half as many elements with twice the input element size. -// -// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX -func (x Uint16x16) SaturatedUnsignedSignedPairDotProd(y Int16x16) Int16x16 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBW, CPU Feature: AVX2 -func (x Uint16x16) Sub(y Uint16x16) Uint16x16 - -// Xor performs a bitwise XOR operation between two vectors. -// -// Asm: VPXOR, CPU Feature: AVX2 -func (x Uint16x16) Xor(y Uint16x16) Uint16x16 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDW, CPU Feature: AVX512EVEX -func (x Uint16x32) Add(y Uint16x32) Uint16x32 - -// Average computes the rounded average of corresponding elements. -// -// Asm: VPAVGW, CPU Feature: AVX512EVEX -func (x Uint16x32) Average(y Uint16x32) Uint16x32 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x32) Equal(y Uint16x32) Mask16x32 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x32) Greater(y Uint16x32) Mask16x32 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x32) GreaterEqual(y Uint16x32) Mask16x32 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x32) Less(y Uint16x32) Mask16x32 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x32) LessEqual(y Uint16x32) Mask16x32 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedPopCount(y Mask16x32) Uint16x32 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUW, CPU Feature: AVX512EVEX -func (x Uint16x32) Max(y Uint16x32) Uint16x32 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUW, CPU Feature: AVX512EVEX -func (x Uint16x32) Min(y Uint16x32) Uint16x32 - -// MulHigh multiplies elements and stores the high part of the result, masked. -// -// Asm: VPMULHUW, CPU Feature: AVX512EVEX -func (x Uint16x32) MulHigh(y Uint16x32) Uint16x32 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x32) NotEqual(y Uint16x32) Mask16x32 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSW, CPU Feature: AVX512EVEX -func (x Uint16x32) SaturatedAdd(y Uint16x32) Uint16x32 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSW, CPU Feature: AVX512EVEX -func (x Uint16x32) SaturatedSub(y Uint16x32) Uint16x32 - -// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, -// yielding a vector of half as many elements with twice the input element size. -// -// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX -func (x Uint16x32) SaturatedUnsignedSignedPairDotProd(y Int16x32) Int16x32 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBW, CPU Feature: AVX512EVEX -func (x Uint16x32) Sub(y Uint16x32) Uint16x32 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDW, CPU Feature: AVX -func (x Uint16x8) Add(y Uint16x8) Uint16x8 - -// And performs a bitwise AND operation between two vectors. -// -// Asm: VPAND, CPU Feature: AVX -func (x Uint16x8) And(y Uint16x8) Uint16x8 - -// AndNot performs a bitwise AND NOT operation between two vectors. -// -// Asm: VPANDN, CPU Feature: AVX -func (x Uint16x8) AndNot(y Uint16x8) Uint16x8 - -// Average computes the rounded average of corresponding elements. -// -// Asm: VPAVGW, CPU Feature: AVX -func (x Uint16x8) Average(y Uint16x8) Uint16x8 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x8) Equal(y Uint16x8) Mask16x8 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x8) Greater(y Uint16x8) Mask16x8 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x8) GreaterEqual(y Uint16x8) Mask16x8 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x8) Less(y Uint16x8) Mask16x8 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedPopCount(y Mask16x8) Uint16x8 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUW, CPU Feature: AVX -func (x Uint16x8) Max(y Uint16x8) Uint16x8 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUW, CPU Feature: AVX -func (x Uint16x8) Min(y Uint16x8) Uint16x8 - -// MulHigh multiplies elements and stores the high part of the result. -// -// Asm: VPMULHUW, CPU Feature: AVX -func (x Uint16x8) MulHigh(y Uint16x8) Uint16x8 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x8) NotEqual(y Uint16x8) Mask16x8 - -// Or performs a bitwise OR operation between two vectors. -// -// Asm: VPOR, CPU Feature: AVX -func (x Uint16x8) Or(y Uint16x8) Uint16x8 - -// PairwiseAdd horizontally adds adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. -// -// Asm: VPHADDW, CPU Feature: AVX -func (x Uint16x8) PairwiseAdd(y Uint16x8) Uint16x8 - -// PairwiseSub horizontally subtracts adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. -// -// Asm: VPHSUBW, CPU Feature: AVX -func (x Uint16x8) PairwiseSub(y Uint16x8) Uint16x8 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSW, CPU Feature: AVX -func (x Uint16x8) SaturatedAdd(y Uint16x8) Uint16x8 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSW, CPU Feature: AVX -func (x Uint16x8) SaturatedSub(y Uint16x8) Uint16x8 - -// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, -// yielding a vector of half as many elements with twice the input element size. -// -// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX -func (x Uint16x8) SaturatedUnsignedSignedPairDotProd(y Int16x8) Int16x8 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBW, CPU Feature: AVX -func (x Uint16x8) Sub(y Uint16x8) Uint16x8 - -// Xor performs a bitwise XOR operation between two vectors. -// -// Asm: VPXOR, CPU Feature: AVX -func (x Uint16x8) Xor(y Uint16x8) Uint16x8 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDD, CPU Feature: AVX512EVEX -func (x Uint32x16) Add(y Uint32x16) Uint32x16 - -// And performs a masked bitwise AND operation between two vectors. -// -// Asm: VPANDD, CPU Feature: AVX512EVEX -func (x Uint32x16) And(y Uint32x16) Uint32x16 - -// AndNot performs a masked bitwise AND NOT operation between two vectors. -// -// Asm: VPANDND, CPU Feature: AVX512EVEX -func (x Uint32x16) AndNot(y Uint32x16) Uint32x16 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x16) Equal(y Uint32x16) Mask32x16 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x16) Greater(y Uint32x16) Mask32x16 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x16) GreaterEqual(y Uint32x16) Mask32x16 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x16) Less(y Uint32x16) Mask32x16 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedPopCount(y Mask32x16) Uint32x16 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUD, CPU Feature: AVX512EVEX -func (x Uint32x16) Max(y Uint32x16) Uint32x16 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUD, CPU Feature: AVX512EVEX -func (x Uint32x16) Min(y Uint32x16) Uint32x16 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x16) NotEqual(y Uint32x16) Mask32x16 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORD, CPU Feature: AVX512EVEX -func (x Uint32x16) Or(y Uint32x16) Uint32x16 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBD, CPU Feature: AVX512EVEX -func (x Uint32x16) Sub(y Uint32x16) Uint32x16 - -// Xor performs a masked bitwise XOR operation between two vectors. -// -// Asm: VPXORD, CPU Feature: AVX512EVEX -func (x Uint32x16) Xor(y Uint32x16) Uint32x16 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDD, CPU Feature: AVX -func (x Uint32x4) Add(y Uint32x4) Uint32x4 - -// And performs a bitwise AND operation between two vectors. -// -// Asm: VPAND, CPU Feature: AVX -func (x Uint32x4) And(y Uint32x4) Uint32x4 - -// AndNot performs a bitwise AND NOT operation between two vectors. -// -// Asm: VPANDN, CPU Feature: AVX -func (x Uint32x4) AndNot(y Uint32x4) Uint32x4 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x4) Equal(y Uint32x4) Mask32x4 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x4) Greater(y Uint32x4) Mask32x4 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x4) GreaterEqual(y Uint32x4) Mask32x4 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x4) Less(y Uint32x4) Mask32x4 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedPopCount(y Mask32x4) Uint32x4 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUD, CPU Feature: AVX -func (x Uint32x4) Max(y Uint32x4) Uint32x4 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUD, CPU Feature: AVX -func (x Uint32x4) Min(y Uint32x4) Uint32x4 - -// MulEvenWiden multiplies even-indexed elements, widening the result. -// Result[i] = v1.Even[i] * v2.Even[i]. -// -// Asm: VPMULUDQ, CPU Feature: AVX -func (x Uint32x4) MulEvenWiden(y Uint32x4) Uint64x2 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x4) NotEqual(y Uint32x4) Mask32x4 - -// Or performs a bitwise OR operation between two vectors. -// -// Asm: VPOR, CPU Feature: AVX -func (x Uint32x4) Or(y Uint32x4) Uint32x4 - -// PairwiseAdd horizontally adds adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. -// -// Asm: VPHADDD, CPU Feature: AVX -func (x Uint32x4) PairwiseAdd(y Uint32x4) Uint32x4 - -// PairwiseSub horizontally subtracts adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. -// -// Asm: VPHSUBD, CPU Feature: AVX -func (x Uint32x4) PairwiseSub(y Uint32x4) Uint32x4 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBD, CPU Feature: AVX -func (x Uint32x4) Sub(y Uint32x4) Uint32x4 - -// Xor performs a bitwise XOR operation between two vectors. -// -// Asm: VPXOR, CPU Feature: AVX -func (x Uint32x4) Xor(y Uint32x4) Uint32x4 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDD, CPU Feature: AVX2 -func (x Uint32x8) Add(y Uint32x8) Uint32x8 - -// And performs a bitwise AND operation between two vectors. -// -// Asm: VPAND, CPU Feature: AVX2 -func (x Uint32x8) And(y Uint32x8) Uint32x8 - -// AndNot performs a bitwise AND NOT operation between two vectors. -// -// Asm: VPANDN, CPU Feature: AVX2 -func (x Uint32x8) AndNot(y Uint32x8) Uint32x8 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x8) Equal(y Uint32x8) Mask32x8 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x8) Greater(y Uint32x8) Mask32x8 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x8) GreaterEqual(y Uint32x8) Mask32x8 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x8) Less(y Uint32x8) Mask32x8 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedPopCount(y Mask32x8) Uint32x8 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUD, CPU Feature: AVX2 -func (x Uint32x8) Max(y Uint32x8) Uint32x8 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUD, CPU Feature: AVX2 -func (x Uint32x8) Min(y Uint32x8) Uint32x8 - -// MulEvenWiden multiplies even-indexed elements, widening the result. -// Result[i] = v1.Even[i] * v2.Even[i]. -// -// Asm: VPMULUDQ, CPU Feature: AVX2 -func (x Uint32x8) MulEvenWiden(y Uint32x8) Uint64x4 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x8) NotEqual(y Uint32x8) Mask32x8 - -// Or performs a bitwise OR operation between two vectors. -// -// Asm: VPOR, CPU Feature: AVX2 -func (x Uint32x8) Or(y Uint32x8) Uint32x8 - -// PairwiseAdd horizontally adds adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. -// -// Asm: VPHADDD, CPU Feature: AVX2 -func (x Uint32x8) PairwiseAdd(y Uint32x8) Uint32x8 - -// PairwiseSub horizontally subtracts adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. -// -// Asm: VPHSUBD, CPU Feature: AVX2 -func (x Uint32x8) PairwiseSub(y Uint32x8) Uint32x8 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBD, CPU Feature: AVX2 -func (x Uint32x8) Sub(y Uint32x8) Uint32x8 - -// Xor performs a bitwise XOR operation between two vectors. -// -// Asm: VPXOR, CPU Feature: AVX2 -func (x Uint32x8) Xor(y Uint32x8) Uint32x8 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDQ, CPU Feature: AVX -func (x Uint64x2) Add(y Uint64x2) Uint64x2 - -// And performs a bitwise AND operation between two vectors. -// -// Asm: VPAND, CPU Feature: AVX -func (x Uint64x2) And(y Uint64x2) Uint64x2 - -// AndNot performs a bitwise AND NOT operation between two vectors. -// -// Asm: VPANDN, CPU Feature: AVX -func (x Uint64x2) AndNot(y Uint64x2) Uint64x2 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) Equal(y Uint64x2) Mask64x2 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) Greater(y Uint64x2) Mask64x2 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) Less(y Uint64x2) Mask64x2 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedPopCount(y Mask64x2) Uint64x2 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) Max(y Uint64x2) Uint64x2 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) Min(y Uint64x2) Uint64x2 - -// MulEvenWiden multiplies even-indexed elements, widening the result, masked. -// Result[i] = v1.Even[i] * v2.Even[i]. -// -// Asm: VPMULUDQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MulEvenWiden(y Uint64x2) Uint64x2 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) NotEqual(y Uint64x2) Mask64x2 - -// Or performs a bitwise OR operation between two vectors. -// -// Asm: VPOR, CPU Feature: AVX -func (x Uint64x2) Or(y Uint64x2) Uint64x2 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBQ, CPU Feature: AVX -func (x Uint64x2) Sub(y Uint64x2) Uint64x2 - -// Xor performs a bitwise XOR operation between two vectors. -// -// Asm: VPXOR, CPU Feature: AVX -func (x Uint64x2) Xor(y Uint64x2) Uint64x2 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDQ, CPU Feature: AVX2 -func (x Uint64x4) Add(y Uint64x4) Uint64x4 - -// And performs a bitwise AND operation between two vectors. -// -// Asm: VPAND, CPU Feature: AVX2 -func (x Uint64x4) And(y Uint64x4) Uint64x4 - -// AndNot performs a bitwise AND NOT operation between two vectors. -// -// Asm: VPANDN, CPU Feature: AVX2 -func (x Uint64x4) AndNot(y Uint64x4) Uint64x4 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) Equal(y Uint64x4) Mask64x4 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) Greater(y Uint64x4) Mask64x4 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) GreaterEqual(y Uint64x4) Mask64x4 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) Less(y Uint64x4) Mask64x4 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedPopCount(y Mask64x4) Uint64x4 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) Max(y Uint64x4) Uint64x4 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) Min(y Uint64x4) Uint64x4 - -// MulEvenWiden multiplies even-indexed elements, widening the result, masked. -// Result[i] = v1.Even[i] * v2.Even[i]. -// -// Asm: VPMULUDQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MulEvenWiden(y Uint64x4) Uint64x4 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) NotEqual(y Uint64x4) Mask64x4 - -// Or performs a bitwise OR operation between two vectors. -// -// Asm: VPOR, CPU Feature: AVX2 -func (x Uint64x4) Or(y Uint64x4) Uint64x4 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBQ, CPU Feature: AVX2 -func (x Uint64x4) Sub(y Uint64x4) Uint64x4 - -// Xor performs a bitwise XOR operation between two vectors. -// -// Asm: VPXOR, CPU Feature: AVX2 -func (x Uint64x4) Xor(y Uint64x4) Uint64x4 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDQ, CPU Feature: AVX512EVEX -func (x Uint64x8) Add(y Uint64x8) Uint64x8 - -// And performs a masked bitwise AND operation between two vectors. -// -// Asm: VPANDQ, CPU Feature: AVX512EVEX -func (x Uint64x8) And(y Uint64x8) Uint64x8 - -// AndNot performs a masked bitwise AND NOT operation between two vectors. -// -// Asm: VPANDNQ, CPU Feature: AVX512EVEX -func (x Uint64x8) AndNot(y Uint64x8) Uint64x8 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) Equal(y Uint64x8) Mask64x8 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) Greater(y Uint64x8) Mask64x8 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) GreaterEqual(y Uint64x8) Mask64x8 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) Less(y Uint64x8) Mask64x8 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) LessEqual(y Uint64x8) Mask64x8 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedPopCount(y Mask64x8) Uint64x8 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) Max(y Uint64x8) Uint64x8 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) Min(y Uint64x8) Uint64x8 - -// MulEvenWiden multiplies even-indexed elements, widening the result, masked. -// Result[i] = v1.Even[i] * v2.Even[i]. -// -// Asm: VPMULUDQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MulEvenWiden(y Uint64x8) Uint64x8 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) NotEqual(y Uint64x8) Mask64x8 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORQ, CPU Feature: AVX512EVEX -func (x Uint64x8) Or(y Uint64x8) Uint64x8 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBQ, CPU Feature: AVX512EVEX -func (x Uint64x8) Sub(y Uint64x8) Uint64x8 - -// Xor performs a masked bitwise XOR operation between two vectors. -// -// Asm: VPXORQ, CPU Feature: AVX512EVEX -func (x Uint64x8) Xor(y Uint64x8) Uint64x8 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDB, CPU Feature: AVX -func (x Uint8x16) Add(y Uint8x16) Uint8x16 - -// And performs a bitwise AND operation between two vectors. -// -// Asm: VPAND, CPU Feature: AVX -func (x Uint8x16) And(y Uint8x16) Uint8x16 - -// AndNot performs a bitwise AND NOT operation between two vectors. -// -// Asm: VPANDN, CPU Feature: AVX -func (x Uint8x16) AndNot(y Uint8x16) Uint8x16 - -// Average computes the rounded average of corresponding elements. -// -// Asm: VPAVGB, CPU Feature: AVX -func (x Uint8x16) Average(y Uint8x16) Uint8x16 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x16) Equal(y Uint8x16) Mask8x16 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x16) Greater(y Uint8x16) Mask8x16 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x16) GreaterEqual(y Uint8x16) Mask8x16 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x16) Less(y Uint8x16) Mask8x16 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedPopCount(y Mask8x16) Uint8x16 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUB, CPU Feature: AVX -func (x Uint8x16) Max(y Uint8x16) Uint8x16 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUB, CPU Feature: AVX -func (x Uint8x16) Min(y Uint8x16) Uint8x16 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x16) NotEqual(y Uint8x16) Mask8x16 - -// Or performs a bitwise OR operation between two vectors. -// -// Asm: VPOR, CPU Feature: AVX -func (x Uint8x16) Or(y Uint8x16) Uint8x16 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSB, CPU Feature: AVX -func (x Uint8x16) SaturatedAdd(y Uint8x16) Uint8x16 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSB, CPU Feature: AVX -func (x Uint8x16) SaturatedSub(y Uint8x16) Uint8x16 - -// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, -// yielding a vector of half as many elements with twice the input element size. -// -// Asm: VPMADDUBSW, CPU Feature: AVX -func (x Uint8x16) SaturatedUnsignedSignedPairDotProd(y Int8x16) Int16x8 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBB, CPU Feature: AVX -func (x Uint8x16) Sub(y Uint8x16) Uint8x16 - -// Xor performs a bitwise XOR operation between two vectors. -// -// Asm: VPXOR, CPU Feature: AVX -func (x Uint8x16) Xor(y Uint8x16) Uint8x16 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDB, CPU Feature: AVX2 -func (x Uint8x32) Add(y Uint8x32) Uint8x32 - -// And performs a bitwise AND operation between two vectors. -// -// Asm: VPAND, CPU Feature: AVX2 -func (x Uint8x32) And(y Uint8x32) Uint8x32 - -// AndNot performs a bitwise AND NOT operation between two vectors. -// -// Asm: VPANDN, CPU Feature: AVX2 -func (x Uint8x32) AndNot(y Uint8x32) Uint8x32 - -// Average computes the rounded average of corresponding elements. -// -// Asm: VPAVGB, CPU Feature: AVX2 -func (x Uint8x32) Average(y Uint8x32) Uint8x32 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x32) Equal(y Uint8x32) Mask8x32 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x32) Greater(y Uint8x32) Mask8x32 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x32) GreaterEqual(y Uint8x32) Mask8x32 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x32) Less(y Uint8x32) Mask8x32 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedPopCount(y Mask8x32) Uint8x32 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUB, CPU Feature: AVX2 -func (x Uint8x32) Max(y Uint8x32) Uint8x32 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUB, CPU Feature: AVX2 -func (x Uint8x32) Min(y Uint8x32) Uint8x32 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x32) NotEqual(y Uint8x32) Mask8x32 - -// Or performs a bitwise OR operation between two vectors. -// -// Asm: VPOR, CPU Feature: AVX2 -func (x Uint8x32) Or(y Uint8x32) Uint8x32 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSB, CPU Feature: AVX2 -func (x Uint8x32) SaturatedAdd(y Uint8x32) Uint8x32 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSB, CPU Feature: AVX2 -func (x Uint8x32) SaturatedSub(y Uint8x32) Uint8x32 - -// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, -// yielding a vector of half as many elements with twice the input element size. -// -// Asm: VPMADDUBSW, CPU Feature: AVX2 -func (x Uint8x32) SaturatedUnsignedSignedPairDotProd(y Int8x32) Int16x16 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBB, CPU Feature: AVX2 -func (x Uint8x32) Sub(y Uint8x32) Uint8x32 - -// Xor performs a bitwise XOR operation between two vectors. -// -// Asm: VPXOR, CPU Feature: AVX2 -func (x Uint8x32) Xor(y Uint8x32) Uint8x32 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDB, CPU Feature: AVX512EVEX -func (x Uint8x64) Add(y Uint8x64) Uint8x64 - -// Average computes the rounded average of corresponding elements. -// -// Asm: VPAVGB, CPU Feature: AVX512EVEX -func (x Uint8x64) Average(y Uint8x64) Uint8x64 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x64) Equal(y Uint8x64) Mask8x64 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x64) Greater(y Uint8x64) Mask8x64 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x64) GreaterEqual(y Uint8x64) Mask8x64 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x64) Less(y Uint8x64) Mask8x64 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x64) LessEqual(y Uint8x64) Mask8x64 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedPopCount(y Mask8x64) Uint8x64 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUB, CPU Feature: AVX512EVEX -func (x Uint8x64) Max(y Uint8x64) Uint8x64 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUB, CPU Feature: AVX512EVEX -func (x Uint8x64) Min(y Uint8x64) Uint8x64 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x64) NotEqual(y Uint8x64) Mask8x64 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSB, CPU Feature: AVX512EVEX -func (x Uint8x64) SaturatedAdd(y Uint8x64) Uint8x64 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSB, CPU Feature: AVX512EVEX -func (x Uint8x64) SaturatedSub(y Uint8x64) Uint8x64 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBB, CPU Feature: AVX512EVEX -func (x Uint8x64) Sub(y Uint8x64) Uint8x64 - -// FusedMultiplyAdd132 performs `(v1 * v3) + v2`. -// -// Asm: VFMADD132PS, CPU Feature: AVX512EVEX -func (x Float32x16) FusedMultiplyAdd132(y Float32x16, z Float32x16) Float32x16 - -// FusedMultiplyAdd213 performs `(v2 * v1) + v3`. -// -// Asm: VFMADD213PS, CPU Feature: AVX512EVEX -func (x Float32x16) FusedMultiplyAdd213(y Float32x16, z Float32x16) Float32x16 - -// FusedMultiplyAdd231 performs `(v2 * v3) + v1`. -// -// Asm: VFMADD231PS, CPU Feature: AVX512EVEX -func (x Float32x16) FusedMultiplyAdd231(y Float32x16, z Float32x16) Float32x16 - -// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. -// -// Asm: VFMADDSUB132PS, CPU Feature: AVX512EVEX -func (x Float32x16) FusedMultiplyAddSub132(y Float32x16, z Float32x16) Float32x16 - -// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. -// -// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX -func (x Float32x16) FusedMultiplyAddSub213(y Float32x16, z Float32x16) Float32x16 - -// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. -// -// Asm: VFMADDSUB231PS, CPU Feature: AVX512EVEX -func (x Float32x16) FusedMultiplyAddSub231(y Float32x16, z Float32x16) Float32x16 - -// FusedMultiplySub132 performs `(v1 * v3) - v2`. -// -// Asm: VFMSUB132PS, CPU Feature: AVX512EVEX -func (x Float32x16) FusedMultiplySub132(y Float32x16, z Float32x16) Float32x16 - -// FusedMultiplySub213 performs `(v2 * v1) - v3`. -// -// Asm: VFMSUB213PS, CPU Feature: AVX512EVEX -func (x Float32x16) FusedMultiplySub213(y Float32x16, z Float32x16) Float32x16 - -// FusedMultiplySub231 performs `(v2 * v3) - v1`. -// -// Asm: VFMSUB231PS, CPU Feature: AVX512EVEX -func (x Float32x16) FusedMultiplySub231(y Float32x16, z Float32x16) Float32x16 - -// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. -// -// Asm: VFMSUBADD132PS, CPU Feature: AVX512EVEX -func (x Float32x16) FusedMultiplySubAdd132(y Float32x16, z Float32x16) Float32x16 - -// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. -// -// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX -func (x Float32x16) FusedMultiplySubAdd213(y Float32x16, z Float32x16) Float32x16 - -// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. -// -// Asm: VFMSUBADD231PS, CPU Feature: AVX512EVEX -func (x Float32x16) FusedMultiplySubAdd231(y Float32x16, z Float32x16) Float32x16 - -// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. -// -// Asm: VFNMADD132PS, CPU Feature: AVX512EVEX -func (x Float32x16) FusedNegativeMultiplyAdd132(y Float32x16, z Float32x16) Float32x16 - -// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. -// -// Asm: VFNMADD213PS, CPU Feature: AVX512EVEX -func (x Float32x16) FusedNegativeMultiplyAdd213(y Float32x16, z Float32x16) Float32x16 - -// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. -// -// Asm: VFNMADD231PS, CPU Feature: AVX512EVEX -func (x Float32x16) FusedNegativeMultiplyAdd231(y Float32x16, z Float32x16) Float32x16 - -// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. -// -// Asm: VFNMSUB132PS, CPU Feature: AVX512EVEX -func (x Float32x16) FusedNegativeMultiplySub132(y Float32x16, z Float32x16) Float32x16 - -// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. -// -// Asm: VFNMSUB213PS, CPU Feature: AVX512EVEX -func (x Float32x16) FusedNegativeMultiplySub213(y Float32x16, z Float32x16) Float32x16 - -// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. -// -// Asm: VFNMSUB231PS, CPU Feature: AVX512EVEX -func (x Float32x16) FusedNegativeMultiplySub231(y Float32x16, z Float32x16) Float32x16 - -// Add adds corresponding elements of two vectors. -// -// Asm: VADDPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedAdd(y Float32x16, z Mask32x16) Float32x16 - -// And performs a masked bitwise AND operation between two vectors. -// -// Asm: VANDPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedAnd(y Float32x16, z Mask32x16) Float32x16 - -// AndNot performs a masked bitwise AND NOT operation between two vectors. -// -// Asm: VANDNPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedAndNot(y Float32x16, z Mask32x16) Float32x16 - -// Div divides elements of two vectors. -// -// Asm: VDIVPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedDiv(y Float32x16, z Mask32x16) Float32x16 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedEqual(y Float32x16, z Mask32x16) Mask32x16 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedGreater(y Float32x16, z Mask32x16) Mask32x16 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedGreaterEqual(y Float32x16, z Mask32x16) Mask32x16 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// Const Immediate = 3. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedIsNan(y Float32x16, z Mask32x16) Mask32x16 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedLess(y Float32x16, z Mask32x16) Mask32x16 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedLessEqual(y Float32x16, z Mask32x16) Mask32x16 - -// Max computes the maximum of corresponding elements. -// -// Asm: VMAXPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedMax(y Float32x16, z Mask32x16) Float32x16 - -// Min computes the minimum of corresponding elements. -// -// Asm: VMINPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedMin(y Float32x16, z Mask32x16) Float32x16 - -// Mul multiplies corresponding elements of two vectors, masked. -// -// Asm: VMULPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedMul(y Float32x16, z Mask32x16) Float32x16 - -// MulByPowOf2 multiplies elements by a power of 2. -// -// Asm: VSCALEFPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedMulByPowOf2(y Float32x16, z Mask32x16) Float32x16 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedNotEqual(y Float32x16, z Mask32x16) Mask32x16 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VORPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedOr(y Float32x16, z Mask32x16) Float32x16 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VADDPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedSub(y Float32x16, z Mask32x16) Float32x16 - -// Xor performs a masked bitwise XOR operation between two vectors. -// -// Asm: VXORPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedXor(y Float32x16, z Mask32x16) Float32x16 - -// FusedMultiplyAdd132 performs `(v1 * v3) + v2`. -// -// Asm: VFMADD132PS, CPU Feature: AVX512EVEX -func (x Float32x4) FusedMultiplyAdd132(y Float32x4, z Float32x4) Float32x4 - -// FusedMultiplyAdd213 performs `(v2 * v1) + v3`. -// -// Asm: VFMADD213PS, CPU Feature: AVX512EVEX -func (x Float32x4) FusedMultiplyAdd213(y Float32x4, z Float32x4) Float32x4 - -// FusedMultiplyAdd231 performs `(v2 * v3) + v1`. -// -// Asm: VFMADD231PS, CPU Feature: AVX512EVEX -func (x Float32x4) FusedMultiplyAdd231(y Float32x4, z Float32x4) Float32x4 - -// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. -// -// Asm: VFMADDSUB132PS, CPU Feature: AVX512EVEX -func (x Float32x4) FusedMultiplyAddSub132(y Float32x4, z Float32x4) Float32x4 - -// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. -// -// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX -func (x Float32x4) FusedMultiplyAddSub213(y Float32x4, z Float32x4) Float32x4 - -// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. -// -// Asm: VFMADDSUB231PS, CPU Feature: AVX512EVEX -func (x Float32x4) FusedMultiplyAddSub231(y Float32x4, z Float32x4) Float32x4 - -// FusedMultiplySub132 performs `(v1 * v3) - v2`. -// -// Asm: VFMSUB132PS, CPU Feature: AVX512EVEX -func (x Float32x4) FusedMultiplySub132(y Float32x4, z Float32x4) Float32x4 - -// FusedMultiplySub213 performs `(v2 * v1) - v3`. -// -// Asm: VFMSUB213PS, CPU Feature: AVX512EVEX -func (x Float32x4) FusedMultiplySub213(y Float32x4, z Float32x4) Float32x4 - -// FusedMultiplySub231 performs `(v2 * v3) - v1`. -// -// Asm: VFMSUB231PS, CPU Feature: AVX512EVEX -func (x Float32x4) FusedMultiplySub231(y Float32x4, z Float32x4) Float32x4 - -// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. -// -// Asm: VFMSUBADD132PS, CPU Feature: AVX512EVEX -func (x Float32x4) FusedMultiplySubAdd132(y Float32x4, z Float32x4) Float32x4 - -// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. -// -// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX -func (x Float32x4) FusedMultiplySubAdd213(y Float32x4, z Float32x4) Float32x4 - -// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. -// -// Asm: VFMSUBADD231PS, CPU Feature: AVX512EVEX -func (x Float32x4) FusedMultiplySubAdd231(y Float32x4, z Float32x4) Float32x4 - -// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. -// -// Asm: VFNMADD132PS, CPU Feature: AVX512EVEX -func (x Float32x4) FusedNegativeMultiplyAdd132(y Float32x4, z Float32x4) Float32x4 - -// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. -// -// Asm: VFNMADD213PS, CPU Feature: AVX512EVEX -func (x Float32x4) FusedNegativeMultiplyAdd213(y Float32x4, z Float32x4) Float32x4 - -// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. -// -// Asm: VFNMADD231PS, CPU Feature: AVX512EVEX -func (x Float32x4) FusedNegativeMultiplyAdd231(y Float32x4, z Float32x4) Float32x4 - -// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. -// -// Asm: VFNMSUB132PS, CPU Feature: AVX512EVEX -func (x Float32x4) FusedNegativeMultiplySub132(y Float32x4, z Float32x4) Float32x4 - -// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. -// -// Asm: VFNMSUB213PS, CPU Feature: AVX512EVEX -func (x Float32x4) FusedNegativeMultiplySub213(y Float32x4, z Float32x4) Float32x4 - -// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. -// -// Asm: VFNMSUB231PS, CPU Feature: AVX512EVEX -func (x Float32x4) FusedNegativeMultiplySub231(y Float32x4, z Float32x4) Float32x4 - -// Add adds corresponding elements of two vectors. -// -// Asm: VADDPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedAdd(y Float32x4, z Mask32x4) Float32x4 - -// And performs a masked bitwise AND operation between two vectors. -// -// Asm: VANDPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedAnd(y Float32x4, z Mask32x4) Float32x4 - -// AndNot performs a masked bitwise AND NOT operation between two vectors. -// -// Asm: VANDNPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedAndNot(y Float32x4, z Mask32x4) Float32x4 - -// Div divides elements of two vectors. -// -// Asm: VDIVPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedDiv(y Float32x4, z Mask32x4) Float32x4 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedEqual(y Float32x4, z Mask32x4) Mask32x4 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedGreater(y Float32x4, z Mask32x4) Mask32x4 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedGreaterEqual(y Float32x4, z Mask32x4) Mask32x4 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// Const Immediate = 3. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedIsNan(y Float32x4, z Mask32x4) Mask32x4 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedLess(y Float32x4, z Mask32x4) Mask32x4 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedLessEqual(y Float32x4, z Mask32x4) Mask32x4 - -// Max computes the maximum of corresponding elements. -// -// Asm: VMAXPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedMax(y Float32x4, z Mask32x4) Float32x4 - -// Min computes the minimum of corresponding elements. -// -// Asm: VMINPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedMin(y Float32x4, z Mask32x4) Float32x4 - -// Mul multiplies corresponding elements of two vectors, masked. -// -// Asm: VMULPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedMul(y Float32x4, z Mask32x4) Float32x4 - -// MulByPowOf2 multiplies elements by a power of 2. -// -// Asm: VSCALEFPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedMulByPowOf2(y Float32x4, z Mask32x4) Float32x4 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedNotEqual(y Float32x4, z Mask32x4) Mask32x4 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VORPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedOr(y Float32x4, z Mask32x4) Float32x4 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VADDPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedSub(y Float32x4, z Mask32x4) Float32x4 - -// Xor performs a masked bitwise XOR operation between two vectors. -// -// Asm: VXORPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedXor(y Float32x4, z Mask32x4) Float32x4 - -// FusedMultiplyAdd132 performs `(v1 * v3) + v2`. -// -// Asm: VFMADD132PS, CPU Feature: AVX512EVEX -func (x Float32x8) FusedMultiplyAdd132(y Float32x8, z Float32x8) Float32x8 - -// FusedMultiplyAdd213 performs `(v2 * v1) + v3`. -// -// Asm: VFMADD213PS, CPU Feature: AVX512EVEX -func (x Float32x8) FusedMultiplyAdd213(y Float32x8, z Float32x8) Float32x8 - -// FusedMultiplyAdd231 performs `(v2 * v3) + v1`. -// -// Asm: VFMADD231PS, CPU Feature: AVX512EVEX -func (x Float32x8) FusedMultiplyAdd231(y Float32x8, z Float32x8) Float32x8 - -// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. -// -// Asm: VFMADDSUB132PS, CPU Feature: AVX512EVEX -func (x Float32x8) FusedMultiplyAddSub132(y Float32x8, z Float32x8) Float32x8 - -// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. -// -// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX -func (x Float32x8) FusedMultiplyAddSub213(y Float32x8, z Float32x8) Float32x8 - -// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. -// -// Asm: VFMADDSUB231PS, CPU Feature: AVX512EVEX -func (x Float32x8) FusedMultiplyAddSub231(y Float32x8, z Float32x8) Float32x8 - -// FusedMultiplySub132 performs `(v1 * v3) - v2`. -// -// Asm: VFMSUB132PS, CPU Feature: AVX512EVEX -func (x Float32x8) FusedMultiplySub132(y Float32x8, z Float32x8) Float32x8 - -// FusedMultiplySub213 performs `(v2 * v1) - v3`. -// -// Asm: VFMSUB213PS, CPU Feature: AVX512EVEX -func (x Float32x8) FusedMultiplySub213(y Float32x8, z Float32x8) Float32x8 - -// FusedMultiplySub231 performs `(v2 * v3) - v1`. -// -// Asm: VFMSUB231PS, CPU Feature: AVX512EVEX -func (x Float32x8) FusedMultiplySub231(y Float32x8, z Float32x8) Float32x8 - -// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. -// -// Asm: VFMSUBADD132PS, CPU Feature: AVX512EVEX -func (x Float32x8) FusedMultiplySubAdd132(y Float32x8, z Float32x8) Float32x8 - -// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. -// -// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX -func (x Float32x8) FusedMultiplySubAdd213(y Float32x8, z Float32x8) Float32x8 - -// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. -// -// Asm: VFMSUBADD231PS, CPU Feature: AVX512EVEX -func (x Float32x8) FusedMultiplySubAdd231(y Float32x8, z Float32x8) Float32x8 - -// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. -// -// Asm: VFNMADD132PS, CPU Feature: AVX512EVEX -func (x Float32x8) FusedNegativeMultiplyAdd132(y Float32x8, z Float32x8) Float32x8 - -// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. -// -// Asm: VFNMADD213PS, CPU Feature: AVX512EVEX -func (x Float32x8) FusedNegativeMultiplyAdd213(y Float32x8, z Float32x8) Float32x8 - -// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. -// -// Asm: VFNMADD231PS, CPU Feature: AVX512EVEX -func (x Float32x8) FusedNegativeMultiplyAdd231(y Float32x8, z Float32x8) Float32x8 - -// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. -// -// Asm: VFNMSUB132PS, CPU Feature: AVX512EVEX -func (x Float32x8) FusedNegativeMultiplySub132(y Float32x8, z Float32x8) Float32x8 - -// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. -// -// Asm: VFNMSUB213PS, CPU Feature: AVX512EVEX -func (x Float32x8) FusedNegativeMultiplySub213(y Float32x8, z Float32x8) Float32x8 - -// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. -// -// Asm: VFNMSUB231PS, CPU Feature: AVX512EVEX -func (x Float32x8) FusedNegativeMultiplySub231(y Float32x8, z Float32x8) Float32x8 - -// Add adds corresponding elements of two vectors. -// -// Asm: VADDPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedAdd(y Float32x8, z Mask32x8) Float32x8 - -// And performs a masked bitwise AND operation between two vectors. -// -// Asm: VANDPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedAnd(y Float32x8, z Mask32x8) Float32x8 - -// AndNot performs a masked bitwise AND NOT operation between two vectors. -// -// Asm: VANDNPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedAndNot(y Float32x8, z Mask32x8) Float32x8 - -// Div divides elements of two vectors. -// -// Asm: VDIVPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedDiv(y Float32x8, z Mask32x8) Float32x8 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedEqual(y Float32x8, z Mask32x8) Mask32x8 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedGreater(y Float32x8, z Mask32x8) Mask32x8 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedGreaterEqual(y Float32x8, z Mask32x8) Mask32x8 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// Const Immediate = 3. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedIsNan(y Float32x8, z Mask32x8) Mask32x8 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedLess(y Float32x8, z Mask32x8) Mask32x8 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedLessEqual(y Float32x8, z Mask32x8) Mask32x8 - -// Max computes the maximum of corresponding elements. -// -// Asm: VMAXPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedMax(y Float32x8, z Mask32x8) Float32x8 - -// Min computes the minimum of corresponding elements. -// -// Asm: VMINPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedMin(y Float32x8, z Mask32x8) Float32x8 - -// Mul multiplies corresponding elements of two vectors, masked. -// -// Asm: VMULPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedMul(y Float32x8, z Mask32x8) Float32x8 - -// MulByPowOf2 multiplies elements by a power of 2. -// -// Asm: VSCALEFPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedMulByPowOf2(y Float32x8, z Mask32x8) Float32x8 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedNotEqual(y Float32x8, z Mask32x8) Mask32x8 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VORPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedOr(y Float32x8, z Mask32x8) Float32x8 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VADDPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedSub(y Float32x8, z Mask32x8) Float32x8 - -// Xor performs a masked bitwise XOR operation between two vectors. -// -// Asm: VXORPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedXor(y Float32x8, z Mask32x8) Float32x8 - -// FusedMultiplyAdd132 performs `(v1 * v3) + v2`. -// -// Asm: VFMADD132PD, CPU Feature: AVX512EVEX -func (x Float64x2) FusedMultiplyAdd132(y Float64x2, z Float64x2) Float64x2 - -// FusedMultiplyAdd213 performs `(v2 * v1) + v3`. -// -// Asm: VFMADD213PD, CPU Feature: AVX512EVEX -func (x Float64x2) FusedMultiplyAdd213(y Float64x2, z Float64x2) Float64x2 - -// FusedMultiplyAdd231 performs `(v2 * v3) + v1`. -// -// Asm: VFMADD231PD, CPU Feature: AVX512EVEX -func (x Float64x2) FusedMultiplyAdd231(y Float64x2, z Float64x2) Float64x2 - -// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. -// -// Asm: VFMADDSUB132PD, CPU Feature: AVX512EVEX -func (x Float64x2) FusedMultiplyAddSub132(y Float64x2, z Float64x2) Float64x2 - -// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. -// -// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX -func (x Float64x2) FusedMultiplyAddSub213(y Float64x2, z Float64x2) Float64x2 - -// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. -// -// Asm: VFMADDSUB231PD, CPU Feature: AVX512EVEX -func (x Float64x2) FusedMultiplyAddSub231(y Float64x2, z Float64x2) Float64x2 - -// FusedMultiplySub132 performs `(v1 * v3) - v2`. -// -// Asm: VFMSUB132PD, CPU Feature: AVX512EVEX -func (x Float64x2) FusedMultiplySub132(y Float64x2, z Float64x2) Float64x2 - -// FusedMultiplySub213 performs `(v2 * v1) - v3`. -// -// Asm: VFMSUB213PD, CPU Feature: AVX512EVEX -func (x Float64x2) FusedMultiplySub213(y Float64x2, z Float64x2) Float64x2 - -// FusedMultiplySub231 performs `(v2 * v3) - v1`. -// -// Asm: VFMSUB231PD, CPU Feature: AVX512EVEX -func (x Float64x2) FusedMultiplySub231(y Float64x2, z Float64x2) Float64x2 - -// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. -// -// Asm: VFMSUBADD132PD, CPU Feature: AVX512EVEX -func (x Float64x2) FusedMultiplySubAdd132(y Float64x2, z Float64x2) Float64x2 - -// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. -// -// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX -func (x Float64x2) FusedMultiplySubAdd213(y Float64x2, z Float64x2) Float64x2 - -// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. -// -// Asm: VFMSUBADD231PD, CPU Feature: AVX512EVEX -func (x Float64x2) FusedMultiplySubAdd231(y Float64x2, z Float64x2) Float64x2 - -// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. -// -// Asm: VFNMADD132PD, CPU Feature: AVX512EVEX -func (x Float64x2) FusedNegativeMultiplyAdd132(y Float64x2, z Float64x2) Float64x2 - -// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. -// -// Asm: VFNMADD213PD, CPU Feature: AVX512EVEX -func (x Float64x2) FusedNegativeMultiplyAdd213(y Float64x2, z Float64x2) Float64x2 - -// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. -// -// Asm: VFNMADD231PD, CPU Feature: AVX512EVEX -func (x Float64x2) FusedNegativeMultiplyAdd231(y Float64x2, z Float64x2) Float64x2 - -// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. -// -// Asm: VFNMSUB132PD, CPU Feature: AVX512EVEX -func (x Float64x2) FusedNegativeMultiplySub132(y Float64x2, z Float64x2) Float64x2 - -// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. -// -// Asm: VFNMSUB213PD, CPU Feature: AVX512EVEX -func (x Float64x2) FusedNegativeMultiplySub213(y Float64x2, z Float64x2) Float64x2 - -// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. -// -// Asm: VFNMSUB231PD, CPU Feature: AVX512EVEX -func (x Float64x2) FusedNegativeMultiplySub231(y Float64x2, z Float64x2) Float64x2 - -// Add adds corresponding elements of two vectors. -// -// Asm: VADDPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedAdd(y Float64x2, z Mask64x2) Float64x2 - -// And performs a masked bitwise AND operation between two vectors. -// -// Asm: VANDPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedAnd(y Float64x2, z Mask64x2) Float64x2 - -// AndNot performs a masked bitwise AND NOT operation between two vectors. -// -// Asm: VANDNPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedAndNot(y Float64x2, z Mask64x2) Float64x2 - -// Div divides elements of two vectors. -// -// Asm: VDIVPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedDiv(y Float64x2, z Mask64x2) Float64x2 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedEqual(y Float64x2, z Mask64x2) Mask64x2 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedGreater(y Float64x2, z Mask64x2) Mask64x2 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedGreaterEqual(y Float64x2, z Mask64x2) Mask64x2 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// Const Immediate = 3. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedIsNan(y Float64x2, z Mask64x2) Mask64x2 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedLess(y Float64x2, z Mask64x2) Mask64x2 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedLessEqual(y Float64x2, z Mask64x2) Mask64x2 - -// Max computes the maximum of corresponding elements. -// -// Asm: VMAXPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedMax(y Float64x2, z Mask64x2) Float64x2 - -// Min computes the minimum of corresponding elements. -// -// Asm: VMINPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedMin(y Float64x2, z Mask64x2) Float64x2 - -// Mul multiplies corresponding elements of two vectors, masked. -// -// Asm: VMULPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedMul(y Float64x2, z Mask64x2) Float64x2 - -// MulByPowOf2 multiplies elements by a power of 2. -// -// Asm: VSCALEFPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedMulByPowOf2(y Float64x2, z Mask64x2) Float64x2 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedNotEqual(y Float64x2, z Mask64x2) Mask64x2 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VORPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedOr(y Float64x2, z Mask64x2) Float64x2 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VADDPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedSub(y Float64x2, z Mask64x2) Float64x2 - -// Xor performs a masked bitwise XOR operation between two vectors. -// -// Asm: VXORPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedXor(y Float64x2, z Mask64x2) Float64x2 - -// FusedMultiplyAdd132 performs `(v1 * v3) + v2`. -// -// Asm: VFMADD132PD, CPU Feature: AVX512EVEX -func (x Float64x4) FusedMultiplyAdd132(y Float64x4, z Float64x4) Float64x4 - -// FusedMultiplyAdd213 performs `(v2 * v1) + v3`. -// -// Asm: VFMADD213PD, CPU Feature: AVX512EVEX -func (x Float64x4) FusedMultiplyAdd213(y Float64x4, z Float64x4) Float64x4 - -// FusedMultiplyAdd231 performs `(v2 * v3) + v1`. -// -// Asm: VFMADD231PD, CPU Feature: AVX512EVEX -func (x Float64x4) FusedMultiplyAdd231(y Float64x4, z Float64x4) Float64x4 - -// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. -// -// Asm: VFMADDSUB132PD, CPU Feature: AVX512EVEX -func (x Float64x4) FusedMultiplyAddSub132(y Float64x4, z Float64x4) Float64x4 - -// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. -// -// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX -func (x Float64x4) FusedMultiplyAddSub213(y Float64x4, z Float64x4) Float64x4 - -// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. -// -// Asm: VFMADDSUB231PD, CPU Feature: AVX512EVEX -func (x Float64x4) FusedMultiplyAddSub231(y Float64x4, z Float64x4) Float64x4 - -// FusedMultiplySub132 performs `(v1 * v3) - v2`. -// -// Asm: VFMSUB132PD, CPU Feature: AVX512EVEX -func (x Float64x4) FusedMultiplySub132(y Float64x4, z Float64x4) Float64x4 - -// FusedMultiplySub213 performs `(v2 * v1) - v3`. -// -// Asm: VFMSUB213PD, CPU Feature: AVX512EVEX -func (x Float64x4) FusedMultiplySub213(y Float64x4, z Float64x4) Float64x4 - -// FusedMultiplySub231 performs `(v2 * v3) - v1`. -// -// Asm: VFMSUB231PD, CPU Feature: AVX512EVEX -func (x Float64x4) FusedMultiplySub231(y Float64x4, z Float64x4) Float64x4 - -// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. -// -// Asm: VFMSUBADD132PD, CPU Feature: AVX512EVEX -func (x Float64x4) FusedMultiplySubAdd132(y Float64x4, z Float64x4) Float64x4 - -// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. -// -// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX -func (x Float64x4) FusedMultiplySubAdd213(y Float64x4, z Float64x4) Float64x4 - -// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. -// -// Asm: VFMSUBADD231PD, CPU Feature: AVX512EVEX -func (x Float64x4) FusedMultiplySubAdd231(y Float64x4, z Float64x4) Float64x4 - -// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. -// -// Asm: VFNMADD132PD, CPU Feature: AVX512EVEX -func (x Float64x4) FusedNegativeMultiplyAdd132(y Float64x4, z Float64x4) Float64x4 - -// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. -// -// Asm: VFNMADD213PD, CPU Feature: AVX512EVEX -func (x Float64x4) FusedNegativeMultiplyAdd213(y Float64x4, z Float64x4) Float64x4 - -// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. -// -// Asm: VFNMADD231PD, CPU Feature: AVX512EVEX -func (x Float64x4) FusedNegativeMultiplyAdd231(y Float64x4, z Float64x4) Float64x4 - -// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. -// -// Asm: VFNMSUB132PD, CPU Feature: AVX512EVEX -func (x Float64x4) FusedNegativeMultiplySub132(y Float64x4, z Float64x4) Float64x4 - -// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. -// -// Asm: VFNMSUB213PD, CPU Feature: AVX512EVEX -func (x Float64x4) FusedNegativeMultiplySub213(y Float64x4, z Float64x4) Float64x4 - -// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. -// -// Asm: VFNMSUB231PD, CPU Feature: AVX512EVEX -func (x Float64x4) FusedNegativeMultiplySub231(y Float64x4, z Float64x4) Float64x4 - -// Add adds corresponding elements of two vectors. -// -// Asm: VADDPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedAdd(y Float64x4, z Mask64x4) Float64x4 - -// And performs a masked bitwise AND operation between two vectors. -// -// Asm: VANDPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedAnd(y Float64x4, z Mask64x4) Float64x4 - -// AndNot performs a masked bitwise AND NOT operation between two vectors. -// -// Asm: VANDNPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedAndNot(y Float64x4, z Mask64x4) Float64x4 - -// Div divides elements of two vectors. -// -// Asm: VDIVPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedDiv(y Float64x4, z Mask64x4) Float64x4 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedEqual(y Float64x4, z Mask64x4) Mask64x4 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedGreater(y Float64x4, z Mask64x4) Mask64x4 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedGreaterEqual(y Float64x4, z Mask64x4) Mask64x4 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// Const Immediate = 3. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedIsNan(y Float64x4, z Mask64x4) Mask64x4 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedLess(y Float64x4, z Mask64x4) Mask64x4 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedLessEqual(y Float64x4, z Mask64x4) Mask64x4 - -// Max computes the maximum of corresponding elements. -// -// Asm: VMAXPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedMax(y Float64x4, z Mask64x4) Float64x4 - -// Min computes the minimum of corresponding elements. -// -// Asm: VMINPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedMin(y Float64x4, z Mask64x4) Float64x4 - -// Mul multiplies corresponding elements of two vectors, masked. -// -// Asm: VMULPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedMul(y Float64x4, z Mask64x4) Float64x4 - -// MulByPowOf2 multiplies elements by a power of 2. -// -// Asm: VSCALEFPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedMulByPowOf2(y Float64x4, z Mask64x4) Float64x4 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedNotEqual(y Float64x4, z Mask64x4) Mask64x4 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VORPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedOr(y Float64x4, z Mask64x4) Float64x4 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VADDPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedSub(y Float64x4, z Mask64x4) Float64x4 - -// Xor performs a masked bitwise XOR operation between two vectors. -// -// Asm: VXORPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedXor(y Float64x4, z Mask64x4) Float64x4 - -// FusedMultiplyAdd132 performs `(v1 * v3) + v2`. -// -// Asm: VFMADD132PD, CPU Feature: AVX512EVEX -func (x Float64x8) FusedMultiplyAdd132(y Float64x8, z Float64x8) Float64x8 - -// FusedMultiplyAdd213 performs `(v2 * v1) + v3`. -// -// Asm: VFMADD213PD, CPU Feature: AVX512EVEX -func (x Float64x8) FusedMultiplyAdd213(y Float64x8, z Float64x8) Float64x8 - -// FusedMultiplyAdd231 performs `(v2 * v3) + v1`. -// -// Asm: VFMADD231PD, CPU Feature: AVX512EVEX -func (x Float64x8) FusedMultiplyAdd231(y Float64x8, z Float64x8) Float64x8 - -// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. -// -// Asm: VFMADDSUB132PD, CPU Feature: AVX512EVEX -func (x Float64x8) FusedMultiplyAddSub132(y Float64x8, z Float64x8) Float64x8 - -// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. -// -// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX -func (x Float64x8) FusedMultiplyAddSub213(y Float64x8, z Float64x8) Float64x8 - -// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. -// -// Asm: VFMADDSUB231PD, CPU Feature: AVX512EVEX -func (x Float64x8) FusedMultiplyAddSub231(y Float64x8, z Float64x8) Float64x8 - -// FusedMultiplySub132 performs `(v1 * v3) - v2`. -// -// Asm: VFMSUB132PD, CPU Feature: AVX512EVEX -func (x Float64x8) FusedMultiplySub132(y Float64x8, z Float64x8) Float64x8 - -// FusedMultiplySub213 performs `(v2 * v1) - v3`. -// -// Asm: VFMSUB213PD, CPU Feature: AVX512EVEX -func (x Float64x8) FusedMultiplySub213(y Float64x8, z Float64x8) Float64x8 - -// FusedMultiplySub231 performs `(v2 * v3) - v1`. -// -// Asm: VFMSUB231PD, CPU Feature: AVX512EVEX -func (x Float64x8) FusedMultiplySub231(y Float64x8, z Float64x8) Float64x8 - -// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. -// -// Asm: VFMSUBADD132PD, CPU Feature: AVX512EVEX -func (x Float64x8) FusedMultiplySubAdd132(y Float64x8, z Float64x8) Float64x8 - -// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. -// -// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX -func (x Float64x8) FusedMultiplySubAdd213(y Float64x8, z Float64x8) Float64x8 - -// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. -// -// Asm: VFMSUBADD231PD, CPU Feature: AVX512EVEX -func (x Float64x8) FusedMultiplySubAdd231(y Float64x8, z Float64x8) Float64x8 - -// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. -// -// Asm: VFNMADD132PD, CPU Feature: AVX512EVEX -func (x Float64x8) FusedNegativeMultiplyAdd132(y Float64x8, z Float64x8) Float64x8 - -// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. -// -// Asm: VFNMADD213PD, CPU Feature: AVX512EVEX -func (x Float64x8) FusedNegativeMultiplyAdd213(y Float64x8, z Float64x8) Float64x8 - -// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. -// -// Asm: VFNMADD231PD, CPU Feature: AVX512EVEX -func (x Float64x8) FusedNegativeMultiplyAdd231(y Float64x8, z Float64x8) Float64x8 - -// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. -// -// Asm: VFNMSUB132PD, CPU Feature: AVX512EVEX -func (x Float64x8) FusedNegativeMultiplySub132(y Float64x8, z Float64x8) Float64x8 - -// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. -// -// Asm: VFNMSUB213PD, CPU Feature: AVX512EVEX -func (x Float64x8) FusedNegativeMultiplySub213(y Float64x8, z Float64x8) Float64x8 - -// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. -// -// Asm: VFNMSUB231PD, CPU Feature: AVX512EVEX -func (x Float64x8) FusedNegativeMultiplySub231(y Float64x8, z Float64x8) Float64x8 - -// Add adds corresponding elements of two vectors. -// -// Asm: VADDPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedAdd(y Float64x8, z Mask64x8) Float64x8 - -// And performs a masked bitwise AND operation between two vectors. -// -// Asm: VANDPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedAnd(y Float64x8, z Mask64x8) Float64x8 - -// AndNot performs a masked bitwise AND NOT operation between two vectors. -// -// Asm: VANDNPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedAndNot(y Float64x8, z Mask64x8) Float64x8 - -// Div divides elements of two vectors. -// -// Asm: VDIVPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedDiv(y Float64x8, z Mask64x8) Float64x8 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedEqual(y Float64x8, z Mask64x8) Mask64x8 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedGreater(y Float64x8, z Mask64x8) Mask64x8 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedGreaterEqual(y Float64x8, z Mask64x8) Mask64x8 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// Const Immediate = 3. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedIsNan(y Float64x8, z Mask64x8) Mask64x8 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedLess(y Float64x8, z Mask64x8) Mask64x8 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedLessEqual(y Float64x8, z Mask64x8) Mask64x8 - -// Max computes the maximum of corresponding elements. -// -// Asm: VMAXPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedMax(y Float64x8, z Mask64x8) Float64x8 - -// Min computes the minimum of corresponding elements. -// -// Asm: VMINPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedMin(y Float64x8, z Mask64x8) Float64x8 - -// Mul multiplies corresponding elements of two vectors, masked. -// -// Asm: VMULPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedMul(y Float64x8, z Mask64x8) Float64x8 - -// MulByPowOf2 multiplies elements by a power of 2. -// -// Asm: VSCALEFPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedMulByPowOf2(y Float64x8, z Mask64x8) Float64x8 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedNotEqual(y Float64x8, z Mask64x8) Mask64x8 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VORPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedOr(y Float64x8, z Mask64x8) Float64x8 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VADDPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedSub(y Float64x8, z Mask64x8) Float64x8 - -// Xor performs a masked bitwise XOR operation between two vectors. -// -// Asm: VXORPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedXor(y Float64x8, z Mask64x8) Float64x8 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedAdd(y Int16x16, z Mask16x16) Int16x16 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPEQW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedEqual(y Int16x16, z Mask16x16) Mask16x16 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPGTW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedGreater(y Int16x16, z Mask16x16) Mask16x16 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedGreaterEqual(y Int16x16, z Mask16x16) Mask16x16 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedLess(y Int16x16, z Mask16x16) Mask16x16 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedLessEqual(y Int16x16, z Mask16x16) Mask16x16 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedMax(y Int16x16, z Mask16x16) Int16x16 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedMin(y Int16x16, z Mask16x16) Int16x16 - -// MulHigh multiplies elements and stores the high part of the result, masked. -// -// Asm: VPMULHW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedMulHigh(y Int16x16, z Mask16x16) Int16x16 - -// MulLow multiplies elements and stores the low part of the result, masked. -// -// Asm: VPMULLW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedMulLow(y Int16x16, z Mask16x16) Int16x16 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedNotEqual(y Int16x16, z Mask16x16) Mask16x16 - -// PairDotProd multiplies the elements and add the pairs together, -// yielding a vector of half as many elements with twice the input element size. -// -// Asm: VPMADDWD, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedPairDotProd(y Int16x16, z Mask16x16) Int32x8 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedSaturatedAdd(y Int16x16, z Mask16x16) Int16x16 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedSaturatedSub(y Int16x16, z Mask16x16) Int16x16 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedSub(y Int16x16, z Mask16x16) Int16x16 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedAdd(y Int16x32, z Mask16x32) Int16x32 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPEQW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedEqual(y Int16x32, z Mask16x32) Mask16x32 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPGTW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedGreater(y Int16x32, z Mask16x32) Mask16x32 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedGreaterEqual(y Int16x32, z Mask16x32) Mask16x32 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedLess(y Int16x32, z Mask16x32) Mask16x32 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedLessEqual(y Int16x32, z Mask16x32) Mask16x32 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedMax(y Int16x32, z Mask16x32) Int16x32 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedMin(y Int16x32, z Mask16x32) Int16x32 - -// MulHigh multiplies elements and stores the high part of the result, masked. -// -// Asm: VPMULHW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedMulHigh(y Int16x32, z Mask16x32) Int16x32 - -// MulLow multiplies elements and stores the low part of the result, masked. -// -// Asm: VPMULLW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedMulLow(y Int16x32, z Mask16x32) Int16x32 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedNotEqual(y Int16x32, z Mask16x32) Mask16x32 - -// PairDotProd multiplies the elements and add the pairs together, -// yielding a vector of half as many elements with twice the input element size. -// -// Asm: VPMADDWD, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedPairDotProd(y Int16x32, z Mask16x32) Int32x16 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedSaturatedAdd(y Int16x32, z Mask16x32) Int16x32 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedSaturatedSub(y Int16x32, z Mask16x32) Int16x32 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedSub(y Int16x32, z Mask16x32) Int16x32 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedAdd(y Int16x8, z Mask16x8) Int16x8 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPEQW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedEqual(y Int16x8, z Mask16x8) Mask16x8 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPGTW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedGreater(y Int16x8, z Mask16x8) Mask16x8 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedGreaterEqual(y Int16x8, z Mask16x8) Mask16x8 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedLess(y Int16x8, z Mask16x8) Mask16x8 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedLessEqual(y Int16x8, z Mask16x8) Mask16x8 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedMax(y Int16x8, z Mask16x8) Int16x8 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedMin(y Int16x8, z Mask16x8) Int16x8 - -// MulHigh multiplies elements and stores the high part of the result, masked. -// -// Asm: VPMULHW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedMulHigh(y Int16x8, z Mask16x8) Int16x8 - -// MulLow multiplies elements and stores the low part of the result, masked. -// -// Asm: VPMULLW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedMulLow(y Int16x8, z Mask16x8) Int16x8 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedNotEqual(y Int16x8, z Mask16x8) Mask16x8 - -// PairDotProd multiplies the elements and add the pairs together, -// yielding a vector of half as many elements with twice the input element size. -// -// Asm: VPMADDWD, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedPairDotProd(y Int16x8, z Mask16x8) Int32x4 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedSaturatedAdd(y Int16x8, z Mask16x8) Int16x8 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedSaturatedSub(y Int16x8, z Mask16x8) Int16x8 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedSub(y Int16x8, z Mask16x8) Int16x8 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedAdd(y Int32x16, z Mask32x16) Int32x16 - -// And performs a masked bitwise AND operation between two vectors. -// -// Asm: VPANDD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedAnd(y Int32x16, z Mask32x16) Int32x16 - -// AndNot performs a masked bitwise AND NOT operation between two vectors. -// -// Asm: VPANDND, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedAndNot(y Int32x16, z Mask32x16) Int32x16 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPEQD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedEqual(y Int32x16, z Mask32x16) Mask32x16 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPGTD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedGreater(y Int32x16, z Mask32x16) Mask32x16 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedGreaterEqual(y Int32x16, z Mask32x16) Mask32x16 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedLess(y Int32x16, z Mask32x16) Mask32x16 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedLessEqual(y Int32x16, z Mask32x16) Mask32x16 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedMax(y Int32x16, z Mask32x16) Int32x16 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedMin(y Int32x16, z Mask32x16) Int32x16 - -// MulLow multiplies elements and stores the low part of the result, masked. -// -// Asm: VPMULLD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedMulLow(y Int32x16, z Mask32x16) Int32x16 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedNotEqual(y Int32x16, z Mask32x16) Mask32x16 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedOr(y Int32x16, z Mask32x16) Int32x16 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedSub(y Int32x16, z Mask32x16) Int32x16 - -// Xor performs a masked bitwise XOR operation between two vectors. -// -// Asm: VPXORD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedXor(y Int32x16, z Mask32x16) Int32x16 - -// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. -// -// Asm: VPDPWSSD, CPU Feature: AVX512EVEX -func (x Int32x16) PairDotProdAccumulate(y Int16x32, z Int32x16) Int32x16 - -// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. -// -// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX -func (x Int32x16) SaturatedPairDotProdAccumulate(y Int16x32, z Int32x16) Int32x16 - -// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX -func (x Int32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16 - -// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSD, CPU Feature: AVX512EVEX -func (x Int32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedAdd(y Int32x4, z Mask32x4) Int32x4 - -// And performs a masked bitwise AND operation between two vectors. -// -// Asm: VPANDD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedAnd(y Int32x4, z Mask32x4) Int32x4 - -// AndNot performs a masked bitwise AND NOT operation between two vectors. -// -// Asm: VPANDND, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedAndNot(y Int32x4, z Mask32x4) Int32x4 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPEQD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedEqual(y Int32x4, z Mask32x4) Mask32x4 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPGTD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedGreater(y Int32x4, z Mask32x4) Mask32x4 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedGreaterEqual(y Int32x4, z Mask32x4) Mask32x4 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedLess(y Int32x4, z Mask32x4) Mask32x4 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedLessEqual(y Int32x4, z Mask32x4) Mask32x4 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedMax(y Int32x4, z Mask32x4) Int32x4 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedMin(y Int32x4, z Mask32x4) Int32x4 - -// MulLow multiplies elements and stores the low part of the result, masked. -// -// Asm: VPMULLD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedMulLow(y Int32x4, z Mask32x4) Int32x4 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedNotEqual(y Int32x4, z Mask32x4) Mask32x4 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedOr(y Int32x4, z Mask32x4) Int32x4 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedSub(y Int32x4, z Mask32x4) Int32x4 - -// Xor performs a masked bitwise XOR operation between two vectors. -// -// Asm: VPXORD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedXor(y Int32x4, z Mask32x4) Int32x4 - -// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. -// -// Asm: VPDPWSSD, CPU Feature: AVX_VNNI -func (x Int32x4) PairDotProdAccumulate(y Int32x4, z Int32x4) Int32x4 - -// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. -// -// Asm: VPDPWSSDS, CPU Feature: AVX_VNNI -func (x Int32x4) SaturatedPairDotProdAccumulate(y Int32x4, z Int32x4) Int32x4 - -// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI -func (x Int32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Int32x4 - -// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSD, CPU Feature: AVX_VNNI -func (x Int32x4) UnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Int32x4 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedAdd(y Int32x8, z Mask32x8) Int32x8 - -// And performs a masked bitwise AND operation between two vectors. -// -// Asm: VPANDD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedAnd(y Int32x8, z Mask32x8) Int32x8 - -// AndNot performs a masked bitwise AND NOT operation between two vectors. -// -// Asm: VPANDND, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedAndNot(y Int32x8, z Mask32x8) Int32x8 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPEQD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedEqual(y Int32x8, z Mask32x8) Mask32x8 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPGTD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedGreater(y Int32x8, z Mask32x8) Mask32x8 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedGreaterEqual(y Int32x8, z Mask32x8) Mask32x8 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedLess(y Int32x8, z Mask32x8) Mask32x8 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedLessEqual(y Int32x8, z Mask32x8) Mask32x8 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedMax(y Int32x8, z Mask32x8) Int32x8 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedMin(y Int32x8, z Mask32x8) Int32x8 - -// MulLow multiplies elements and stores the low part of the result, masked. -// -// Asm: VPMULLD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedMulLow(y Int32x8, z Mask32x8) Int32x8 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedNotEqual(y Int32x8, z Mask32x8) Mask32x8 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedOr(y Int32x8, z Mask32x8) Int32x8 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedSub(y Int32x8, z Mask32x8) Int32x8 - -// Xor performs a masked bitwise XOR operation between two vectors. -// -// Asm: VPXORD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedXor(y Int32x8, z Mask32x8) Int32x8 - -// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. -// -// Asm: VPDPWSSD, CPU Feature: AVX_VNNI -func (x Int32x8) PairDotProdAccumulate(y Int32x8, z Int32x8) Int32x8 - -// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. -// -// Asm: VPDPWSSDS, CPU Feature: AVX_VNNI -func (x Int32x8) SaturatedPairDotProdAccumulate(y Int32x8, z Int32x8) Int32x8 - -// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI -func (x Int32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Int32x8 - -// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSD, CPU Feature: AVX_VNNI -func (x Int32x8) UnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Int32x8 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedAdd(y Int64x2, z Mask64x2) Int64x2 - -// And performs a masked bitwise AND operation between two vectors. -// -// Asm: VPANDQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedAnd(y Int64x2, z Mask64x2) Int64x2 - -// AndNot performs a masked bitwise AND NOT operation between two vectors. -// -// Asm: VPANDNQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedAndNot(y Int64x2, z Mask64x2) Int64x2 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPEQQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedEqual(y Int64x2, z Mask64x2) Mask64x2 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPGTQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedGreater(y Int64x2, z Mask64x2) Mask64x2 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedGreaterEqual(y Int64x2, z Mask64x2) Mask64x2 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedLess(y Int64x2, z Mask64x2) Mask64x2 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedLessEqual(y Int64x2, z Mask64x2) Mask64x2 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedMax(y Int64x2, z Mask64x2) Int64x2 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedMin(y Int64x2, z Mask64x2) Int64x2 - -// MulEvenWiden multiplies even-indexed elements, widening the result, masked. -// Result[i] = v1.Even[i] * v2.Even[i]. -// -// Asm: VPMULDQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedMulEvenWiden(y Int64x2, z Mask64x2) Int64x2 - -// MulLow multiplies elements and stores the low part of the result, masked. -// -// Asm: VPMULLQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedMulLow(y Int64x2, z Mask64x2) Int64x2 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedNotEqual(y Int64x2, z Mask64x2) Mask64x2 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedOr(y Int64x2, z Mask64x2) Int64x2 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedSub(y Int64x2, z Mask64x2) Int64x2 - -// Xor performs a masked bitwise XOR operation between two vectors. -// -// Asm: VPXORQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedXor(y Int64x2, z Mask64x2) Int64x2 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedAdd(y Int64x4, z Mask64x4) Int64x4 - -// And performs a masked bitwise AND operation between two vectors. -// -// Asm: VPANDQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedAnd(y Int64x4, z Mask64x4) Int64x4 - -// AndNot performs a masked bitwise AND NOT operation between two vectors. -// -// Asm: VPANDNQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedAndNot(y Int64x4, z Mask64x4) Int64x4 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPEQQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedEqual(y Int64x4, z Mask64x4) Mask64x4 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPGTQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedGreater(y Int64x4, z Mask64x4) Mask64x4 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedGreaterEqual(y Int64x4, z Mask64x4) Mask64x4 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedLess(y Int64x4, z Mask64x4) Mask64x4 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedLessEqual(y Int64x4, z Mask64x4) Mask64x4 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedMax(y Int64x4, z Mask64x4) Int64x4 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedMin(y Int64x4, z Mask64x4) Int64x4 - -// MulEvenWiden multiplies even-indexed elements, widening the result, masked. -// Result[i] = v1.Even[i] * v2.Even[i]. -// -// Asm: VPMULDQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedMulEvenWiden(y Int64x4, z Mask64x4) Int64x4 - -// MulLow multiplies elements and stores the low part of the result, masked. -// -// Asm: VPMULLQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedMulLow(y Int64x4, z Mask64x4) Int64x4 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedNotEqual(y Int64x4, z Mask64x4) Mask64x4 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedOr(y Int64x4, z Mask64x4) Int64x4 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedSub(y Int64x4, z Mask64x4) Int64x4 - -// Xor performs a masked bitwise XOR operation between two vectors. -// -// Asm: VPXORQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedXor(y Int64x4, z Mask64x4) Int64x4 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedAdd(y Int64x8, z Mask64x8) Int64x8 - -// And performs a masked bitwise AND operation between two vectors. -// -// Asm: VPANDQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedAnd(y Int64x8, z Mask64x8) Int64x8 - -// AndNot performs a masked bitwise AND NOT operation between two vectors. -// -// Asm: VPANDNQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedAndNot(y Int64x8, z Mask64x8) Int64x8 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPEQQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedEqual(y Int64x8, z Mask64x8) Mask64x8 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPGTQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedGreater(y Int64x8, z Mask64x8) Mask64x8 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedGreaterEqual(y Int64x8, z Mask64x8) Mask64x8 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedLess(y Int64x8, z Mask64x8) Mask64x8 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedLessEqual(y Int64x8, z Mask64x8) Mask64x8 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedMax(y Int64x8, z Mask64x8) Int64x8 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedMin(y Int64x8, z Mask64x8) Int64x8 - -// MulEvenWiden multiplies even-indexed elements, widening the result, masked. -// Result[i] = v1.Even[i] * v2.Even[i]. -// -// Asm: VPMULDQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedMulEvenWiden(y Int64x8, z Mask64x8) Int64x8 - -// MulLow multiplies elements and stores the low part of the result, masked. -// -// Asm: VPMULLQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedMulLow(y Int64x8, z Mask64x8) Int64x8 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedNotEqual(y Int64x8, z Mask64x8) Mask64x8 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedOr(y Int64x8, z Mask64x8) Int64x8 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedSub(y Int64x8, z Mask64x8) Int64x8 - -// Xor performs a masked bitwise XOR operation between two vectors. -// -// Asm: VPXORQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedXor(y Int64x8, z Mask64x8) Int64x8 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedAdd(y Int8x16, z Mask8x16) Int8x16 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPEQB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedEqual(y Int8x16, z Mask8x16) Mask8x16 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPGTB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedGreater(y Int8x16, z Mask8x16) Mask8x16 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedGreaterEqual(y Int8x16, z Mask8x16) Mask8x16 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedLess(y Int8x16, z Mask8x16) Mask8x16 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedLessEqual(y Int8x16, z Mask8x16) Mask8x16 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedMax(y Int8x16, z Mask8x16) Int8x16 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedMin(y Int8x16, z Mask8x16) Int8x16 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedNotEqual(y Int8x16, z Mask8x16) Mask8x16 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedSaturatedAdd(y Int8x16, z Mask8x16) Int8x16 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedSaturatedSub(y Int8x16, z Mask8x16) Int8x16 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedSub(y Int8x16, z Mask8x16) Int8x16 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedAdd(y Int8x32, z Mask8x32) Int8x32 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPEQB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedEqual(y Int8x32, z Mask8x32) Mask8x32 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPGTB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedGreater(y Int8x32, z Mask8x32) Mask8x32 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedGreaterEqual(y Int8x32, z Mask8x32) Mask8x32 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedLess(y Int8x32, z Mask8x32) Mask8x32 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedLessEqual(y Int8x32, z Mask8x32) Mask8x32 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedMax(y Int8x32, z Mask8x32) Int8x32 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedMin(y Int8x32, z Mask8x32) Int8x32 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedNotEqual(y Int8x32, z Mask8x32) Mask8x32 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedSaturatedAdd(y Int8x32, z Mask8x32) Int8x32 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedSaturatedSub(y Int8x32, z Mask8x32) Int8x32 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedSub(y Int8x32, z Mask8x32) Int8x32 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedAdd(y Int8x64, z Mask8x64) Int8x64 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPEQB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedEqual(y Int8x64, z Mask8x64) Mask8x64 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPGTB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedGreater(y Int8x64, z Mask8x64) Mask8x64 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedGreaterEqual(y Int8x64, z Mask8x64) Mask8x64 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedLess(y Int8x64, z Mask8x64) Mask8x64 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedLessEqual(y Int8x64, z Mask8x64) Mask8x64 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedMax(y Int8x64, z Mask8x64) Int8x64 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedMin(y Int8x64, z Mask8x64) Int8x64 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedNotEqual(y Int8x64, z Mask8x64) Mask8x64 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedSaturatedAdd(y Int8x64, z Mask8x64) Int8x64 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedSaturatedSub(y Int8x64, z Mask8x64) Int8x64 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedSub(y Int8x64, z Mask8x64) Int8x64 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedAdd(y Uint16x16, z Mask16x16) Uint16x16 - -// Average computes the rounded average of corresponding elements. -// -// Asm: VPAVGW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedAverage(y Uint16x16, z Mask16x16) Uint16x16 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedEqual(y Uint16x16, z Mask16x16) Mask16x16 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedGreater(y Uint16x16, z Mask16x16) Mask16x16 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedGreaterEqual(y Uint16x16, z Mask16x16) Mask16x16 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedLess(y Uint16x16, z Mask16x16) Mask16x16 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedLessEqual(y Uint16x16, z Mask16x16) Mask16x16 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedMax(y Uint16x16, z Mask16x16) Uint16x16 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedMin(y Uint16x16, z Mask16x16) Uint16x16 - -// MulHigh multiplies elements and stores the high part of the result, masked. -// -// Asm: VPMULHUW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedMulHigh(y Uint16x16, z Mask16x16) Uint16x16 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedNotEqual(y Uint16x16, z Mask16x16) Mask16x16 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedSaturatedAdd(y Uint16x16, z Mask16x16) Uint16x16 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedSaturatedSub(y Uint16x16, z Mask16x16) Uint16x16 - -// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, -// yielding a vector of half as many elements with twice the input element size. -// -// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedSaturatedUnsignedSignedPairDotProd(y Int16x16, z Mask16x16) Int16x16 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedSub(y Uint16x16, z Mask16x16) Uint16x16 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedAdd(y Uint16x32, z Mask16x32) Uint16x32 - -// Average computes the rounded average of corresponding elements. -// -// Asm: VPAVGW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedAverage(y Uint16x32, z Mask16x32) Uint16x32 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedEqual(y Uint16x32, z Mask16x32) Mask16x32 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedGreater(y Uint16x32, z Mask16x32) Mask16x32 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedGreaterEqual(y Uint16x32, z Mask16x32) Mask16x32 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedLess(y Uint16x32, z Mask16x32) Mask16x32 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedLessEqual(y Uint16x32, z Mask16x32) Mask16x32 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedMax(y Uint16x32, z Mask16x32) Uint16x32 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedMin(y Uint16x32, z Mask16x32) Uint16x32 - -// MulHigh multiplies elements and stores the high part of the result, masked. -// -// Asm: VPMULHUW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedMulHigh(y Uint16x32, z Mask16x32) Uint16x32 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedNotEqual(y Uint16x32, z Mask16x32) Mask16x32 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedSaturatedAdd(y Uint16x32, z Mask16x32) Uint16x32 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedSaturatedSub(y Uint16x32, z Mask16x32) Uint16x32 - -// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, -// yielding a vector of half as many elements with twice the input element size. -// -// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedSaturatedUnsignedSignedPairDotProd(y Int16x32, z Mask16x32) Int16x32 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedSub(y Uint16x32, z Mask16x32) Uint16x32 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedAdd(y Uint16x8, z Mask16x8) Uint16x8 - -// Average computes the rounded average of corresponding elements. -// -// Asm: VPAVGW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedAverage(y Uint16x8, z Mask16x8) Uint16x8 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedEqual(y Uint16x8, z Mask16x8) Mask16x8 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedGreater(y Uint16x8, z Mask16x8) Mask16x8 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedGreaterEqual(y Uint16x8, z Mask16x8) Mask16x8 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedLess(y Uint16x8, z Mask16x8) Mask16x8 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedLessEqual(y Uint16x8, z Mask16x8) Mask16x8 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedMax(y Uint16x8, z Mask16x8) Uint16x8 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedMin(y Uint16x8, z Mask16x8) Uint16x8 - -// MulHigh multiplies elements and stores the high part of the result, masked. -// -// Asm: VPMULHUW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedMulHigh(y Uint16x8, z Mask16x8) Uint16x8 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedNotEqual(y Uint16x8, z Mask16x8) Mask16x8 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedSaturatedAdd(y Uint16x8, z Mask16x8) Uint16x8 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedSaturatedSub(y Uint16x8, z Mask16x8) Uint16x8 - -// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, -// yielding a vector of half as many elements with twice the input element size. -// -// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedSaturatedUnsignedSignedPairDotProd(y Int16x8, z Mask16x8) Int16x8 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedSub(y Uint16x8, z Mask16x8) Uint16x8 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedAdd(y Uint32x16, z Mask32x16) Uint32x16 - -// And performs a masked bitwise AND operation between two vectors. -// -// Asm: VPANDD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedAnd(y Uint32x16, z Mask32x16) Uint32x16 - -// AndNot performs a masked bitwise AND NOT operation between two vectors. -// -// Asm: VPANDND, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedAndNot(y Uint32x16, z Mask32x16) Uint32x16 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedEqual(y Uint32x16, z Mask32x16) Mask32x16 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedGreater(y Uint32x16, z Mask32x16) Mask32x16 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedGreaterEqual(y Uint32x16, z Mask32x16) Mask32x16 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedLess(y Uint32x16, z Mask32x16) Mask32x16 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedLessEqual(y Uint32x16, z Mask32x16) Mask32x16 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedMax(y Uint32x16, z Mask32x16) Uint32x16 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedMin(y Uint32x16, z Mask32x16) Uint32x16 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedNotEqual(y Uint32x16, z Mask32x16) Mask32x16 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedOr(y Uint32x16, z Mask32x16) Uint32x16 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedSub(y Uint32x16, z Mask32x16) Uint32x16 - -// Xor performs a masked bitwise XOR operation between two vectors. -// -// Asm: VPXORD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedXor(y Uint32x16, z Mask32x16) Uint32x16 - -// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX -func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Uint32x16 - -// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSD, CPU Feature: AVX512EVEX -func (x Uint32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Uint32x16 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedAdd(y Uint32x4, z Mask32x4) Uint32x4 - -// And performs a masked bitwise AND operation between two vectors. -// -// Asm: VPANDD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedAnd(y Uint32x4, z Mask32x4) Uint32x4 - -// AndNot performs a masked bitwise AND NOT operation between two vectors. -// -// Asm: VPANDND, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedAndNot(y Uint32x4, z Mask32x4) Uint32x4 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedEqual(y Uint32x4, z Mask32x4) Mask32x4 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedGreater(y Uint32x4, z Mask32x4) Mask32x4 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedGreaterEqual(y Uint32x4, z Mask32x4) Mask32x4 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedLess(y Uint32x4, z Mask32x4) Mask32x4 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedLessEqual(y Uint32x4, z Mask32x4) Mask32x4 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedMax(y Uint32x4, z Mask32x4) Uint32x4 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedMin(y Uint32x4, z Mask32x4) Uint32x4 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedNotEqual(y Uint32x4, z Mask32x4) Mask32x4 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedOr(y Uint32x4, z Mask32x4) Uint32x4 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedSub(y Uint32x4, z Mask32x4) Uint32x4 - -// Xor performs a masked bitwise XOR operation between two vectors. -// -// Asm: VPXORD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedXor(y Uint32x4, z Mask32x4) Uint32x4 - -// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI -func (x Uint32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Uint32x4 - -// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSD, CPU Feature: AVX_VNNI -func (x Uint32x4) UnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Uint32x4 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedAdd(y Uint32x8, z Mask32x8) Uint32x8 - -// And performs a masked bitwise AND operation between two vectors. -// -// Asm: VPANDD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedAnd(y Uint32x8, z Mask32x8) Uint32x8 - -// AndNot performs a masked bitwise AND NOT operation between two vectors. -// -// Asm: VPANDND, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedAndNot(y Uint32x8, z Mask32x8) Uint32x8 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedEqual(y Uint32x8, z Mask32x8) Mask32x8 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedGreater(y Uint32x8, z Mask32x8) Mask32x8 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedGreaterEqual(y Uint32x8, z Mask32x8) Mask32x8 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedLess(y Uint32x8, z Mask32x8) Mask32x8 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedLessEqual(y Uint32x8, z Mask32x8) Mask32x8 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedMax(y Uint32x8, z Mask32x8) Uint32x8 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedMin(y Uint32x8, z Mask32x8) Uint32x8 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedNotEqual(y Uint32x8, z Mask32x8) Mask32x8 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedOr(y Uint32x8, z Mask32x8) Uint32x8 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedSub(y Uint32x8, z Mask32x8) Uint32x8 - -// Xor performs a masked bitwise XOR operation between two vectors. -// -// Asm: VPXORD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedXor(y Uint32x8, z Mask32x8) Uint32x8 - -// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI -func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Uint32x8 - -// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSD, CPU Feature: AVX_VNNI -func (x Uint32x8) UnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Uint32x8 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedAdd(y Uint64x2, z Mask64x2) Uint64x2 - -// And performs a masked bitwise AND operation between two vectors. -// -// Asm: VPANDQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedAnd(y Uint64x2, z Mask64x2) Uint64x2 - -// AndNot performs a masked bitwise AND NOT operation between two vectors. -// -// Asm: VPANDNQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedAndNot(y Uint64x2, z Mask64x2) Uint64x2 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedEqual(y Uint64x2, z Mask64x2) Mask64x2 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedGreater(y Uint64x2, z Mask64x2) Mask64x2 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedGreaterEqual(y Uint64x2, z Mask64x2) Mask64x2 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedLess(y Uint64x2, z Mask64x2) Mask64x2 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedLessEqual(y Uint64x2, z Mask64x2) Mask64x2 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedMax(y Uint64x2, z Mask64x2) Uint64x2 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedMin(y Uint64x2, z Mask64x2) Uint64x2 - -// MulEvenWiden multiplies even-indexed elements, widening the result, masked. -// Result[i] = v1.Even[i] * v2.Even[i]. -// -// Asm: VPMULUDQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedMulEvenWiden(y Uint64x2, z Mask64x2) Uint64x2 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedNotEqual(y Uint64x2, z Mask64x2) Mask64x2 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedOr(y Uint64x2, z Mask64x2) Uint64x2 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedSub(y Uint64x2, z Mask64x2) Uint64x2 - -// Xor performs a masked bitwise XOR operation between two vectors. -// -// Asm: VPXORQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedXor(y Uint64x2, z Mask64x2) Uint64x2 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedAdd(y Uint64x4, z Mask64x4) Uint64x4 - -// And performs a masked bitwise AND operation between two vectors. -// -// Asm: VPANDQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedAnd(y Uint64x4, z Mask64x4) Uint64x4 - -// AndNot performs a masked bitwise AND NOT operation between two vectors. -// -// Asm: VPANDNQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedAndNot(y Uint64x4, z Mask64x4) Uint64x4 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedEqual(y Uint64x4, z Mask64x4) Mask64x4 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedGreater(y Uint64x4, z Mask64x4) Mask64x4 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedGreaterEqual(y Uint64x4, z Mask64x4) Mask64x4 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedLess(y Uint64x4, z Mask64x4) Mask64x4 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedLessEqual(y Uint64x4, z Mask64x4) Mask64x4 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedMax(y Uint64x4, z Mask64x4) Uint64x4 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedMin(y Uint64x4, z Mask64x4) Uint64x4 - -// MulEvenWiden multiplies even-indexed elements, widening the result, masked. -// Result[i] = v1.Even[i] * v2.Even[i]. -// -// Asm: VPMULUDQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedMulEvenWiden(y Uint64x4, z Mask64x4) Uint64x4 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedNotEqual(y Uint64x4, z Mask64x4) Mask64x4 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedOr(y Uint64x4, z Mask64x4) Uint64x4 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedSub(y Uint64x4, z Mask64x4) Uint64x4 - -// Xor performs a masked bitwise XOR operation between two vectors. -// -// Asm: VPXORQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedXor(y Uint64x4, z Mask64x4) Uint64x4 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedAdd(y Uint64x8, z Mask64x8) Uint64x8 - -// And performs a masked bitwise AND operation between two vectors. -// -// Asm: VPANDQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedAnd(y Uint64x8, z Mask64x8) Uint64x8 - -// AndNot performs a masked bitwise AND NOT operation between two vectors. -// -// Asm: VPANDNQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedAndNot(y Uint64x8, z Mask64x8) Uint64x8 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedEqual(y Uint64x8, z Mask64x8) Mask64x8 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedGreater(y Uint64x8, z Mask64x8) Mask64x8 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedGreaterEqual(y Uint64x8, z Mask64x8) Mask64x8 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedLess(y Uint64x8, z Mask64x8) Mask64x8 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedLessEqual(y Uint64x8, z Mask64x8) Mask64x8 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedMax(y Uint64x8, z Mask64x8) Uint64x8 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedMin(y Uint64x8, z Mask64x8) Uint64x8 - -// MulEvenWiden multiplies even-indexed elements, widening the result, masked. -// Result[i] = v1.Even[i] * v2.Even[i]. -// -// Asm: VPMULUDQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedMulEvenWiden(y Uint64x8, z Mask64x8) Uint64x8 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedNotEqual(y Uint64x8, z Mask64x8) Mask64x8 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedOr(y Uint64x8, z Mask64x8) Uint64x8 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedSub(y Uint64x8, z Mask64x8) Uint64x8 - -// Xor performs a masked bitwise XOR operation between two vectors. -// -// Asm: VPXORQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedXor(y Uint64x8, z Mask64x8) Uint64x8 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedAdd(y Uint8x16, z Mask8x16) Uint8x16 - -// Average computes the rounded average of corresponding elements. -// -// Asm: VPAVGB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedAverage(y Uint8x16, z Mask8x16) Uint8x16 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedEqual(y Uint8x16, z Mask8x16) Mask8x16 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedGreater(y Uint8x16, z Mask8x16) Mask8x16 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedGreaterEqual(y Uint8x16, z Mask8x16) Mask8x16 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedLess(y Uint8x16, z Mask8x16) Mask8x16 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedLessEqual(y Uint8x16, z Mask8x16) Mask8x16 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedMax(y Uint8x16, z Mask8x16) Uint8x16 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedMin(y Uint8x16, z Mask8x16) Uint8x16 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedNotEqual(y Uint8x16, z Mask8x16) Mask8x16 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedSaturatedAdd(y Uint8x16, z Mask8x16) Uint8x16 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedSaturatedSub(y Uint8x16, z Mask8x16) Uint8x16 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedSub(y Uint8x16, z Mask8x16) Uint8x16 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedAdd(y Uint8x32, z Mask8x32) Uint8x32 - -// Average computes the rounded average of corresponding elements. -// -// Asm: VPAVGB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedAverage(y Uint8x32, z Mask8x32) Uint8x32 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedEqual(y Uint8x32, z Mask8x32) Mask8x32 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedGreater(y Uint8x32, z Mask8x32) Mask8x32 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedGreaterEqual(y Uint8x32, z Mask8x32) Mask8x32 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedLess(y Uint8x32, z Mask8x32) Mask8x32 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedLessEqual(y Uint8x32, z Mask8x32) Mask8x32 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedMax(y Uint8x32, z Mask8x32) Uint8x32 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedMin(y Uint8x32, z Mask8x32) Uint8x32 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedNotEqual(y Uint8x32, z Mask8x32) Mask8x32 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedSaturatedAdd(y Uint8x32, z Mask8x32) Uint8x32 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedSaturatedSub(y Uint8x32, z Mask8x32) Uint8x32 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedSub(y Uint8x32, z Mask8x32) Uint8x32 - -// Add adds corresponding elements of two vectors. -// -// Asm: VPADDB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedAdd(y Uint8x64, z Mask8x64) Uint8x64 - -// Average computes the rounded average of corresponding elements. -// -// Asm: VPAVGB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedAverage(y Uint8x64, z Mask8x64) Uint8x64 - -// Equal compares for equality, masked. -// Const Immediate = 0. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedEqual(y Uint8x64, z Mask8x64) Mask8x64 - -// Greater compares for greater than. -// Const Immediate = 6. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedGreater(y Uint8x64, z Mask8x64) Mask8x64 - -// GreaterEqual compares for greater than or equal. -// Const Immediate = 5. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedGreaterEqual(y Uint8x64, z Mask8x64) Mask8x64 - -// Less compares for less than. -// Const Immediate = 1. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedLess(y Uint8x64, z Mask8x64) Mask8x64 - -// LessEqual compares for less than or equal. -// Const Immediate = 2. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedLessEqual(y Uint8x64, z Mask8x64) Mask8x64 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedMax(y Uint8x64, z Mask8x64) Uint8x64 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedMin(y Uint8x64, z Mask8x64) Uint8x64 - -// NotEqual compares for inequality. -// Const Immediate = 4. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedNotEqual(y Uint8x64, z Mask8x64) Mask8x64 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedSaturatedAdd(y Uint8x64, z Mask8x64) Uint8x64 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedSaturatedSub(y Uint8x64, z Mask8x64) Uint8x64 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VPSUBB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedSub(y Uint8x64, z Mask8x64) Uint8x64 - -// FusedMultiplyAdd132 performs `(v1 * v3) + v2`. -// -// Asm: VFMADD132PS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedFusedMultiplyAdd132(y Float32x16, z Float32x16, u Mask32x16) Float32x16 - -// FusedMultiplyAdd213 performs `(v2 * v1) + v3`. -// -// Asm: VFMADD213PS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedFusedMultiplyAdd213(y Float32x16, z Float32x16, u Mask32x16) Float32x16 - -// FusedMultiplyAdd231 performs `(v2 * v3) + v1`. -// -// Asm: VFMADD231PS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedFusedMultiplyAdd231(y Float32x16, z Float32x16, u Mask32x16) Float32x16 - -// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. -// -// Asm: VFMADDSUB132PS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedFusedMultiplyAddSub132(y Float32x16, z Float32x16, u Mask32x16) Float32x16 - -// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. -// -// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedFusedMultiplyAddSub213(y Float32x16, z Float32x16, u Mask32x16) Float32x16 - -// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. -// -// Asm: VFMADDSUB231PS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedFusedMultiplyAddSub231(y Float32x16, z Float32x16, u Mask32x16) Float32x16 - -// FusedMultiplySub132 performs `(v1 * v3) - v2`. -// -// Asm: VFMSUB132PS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedFusedMultiplySub132(y Float32x16, z Float32x16, u Mask32x16) Float32x16 - -// FusedMultiplySub213 performs `(v2 * v1) - v3`. -// -// Asm: VFMSUB213PS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedFusedMultiplySub213(y Float32x16, z Float32x16, u Mask32x16) Float32x16 - -// FusedMultiplySub231 performs `(v2 * v3) - v1`. -// -// Asm: VFMSUB231PS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedFusedMultiplySub231(y Float32x16, z Float32x16, u Mask32x16) Float32x16 - -// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. -// -// Asm: VFMSUBADD132PS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedFusedMultiplySubAdd132(y Float32x16, z Float32x16, u Mask32x16) Float32x16 - -// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. -// -// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedFusedMultiplySubAdd213(y Float32x16, z Float32x16, u Mask32x16) Float32x16 - -// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. -// -// Asm: VFMSUBADD231PS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedFusedMultiplySubAdd231(y Float32x16, z Float32x16, u Mask32x16) Float32x16 - -// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. -// -// Asm: VFNMADD132PS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedFusedNegativeMultiplyAdd132(y Float32x16, z Float32x16, u Mask32x16) Float32x16 - -// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. -// -// Asm: VFNMADD213PS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedFusedNegativeMultiplyAdd213(y Float32x16, z Float32x16, u Mask32x16) Float32x16 - -// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. -// -// Asm: VFNMADD231PS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedFusedNegativeMultiplyAdd231(y Float32x16, z Float32x16, u Mask32x16) Float32x16 - -// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. -// -// Asm: VFNMSUB132PS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedFusedNegativeMultiplySub132(y Float32x16, z Float32x16, u Mask32x16) Float32x16 - -// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. -// -// Asm: VFNMSUB213PS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedFusedNegativeMultiplySub213(y Float32x16, z Float32x16, u Mask32x16) Float32x16 - -// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. -// -// Asm: VFNMSUB231PS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedFusedNegativeMultiplySub231(y Float32x16, z Float32x16, u Mask32x16) Float32x16 - -// FusedMultiplyAdd132 performs `(v1 * v3) + v2`. -// -// Asm: VFMADD132PS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedFusedMultiplyAdd132(y Float32x4, z Float32x4, u Mask32x4) Float32x4 - -// FusedMultiplyAdd213 performs `(v2 * v1) + v3`. -// -// Asm: VFMADD213PS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedFusedMultiplyAdd213(y Float32x4, z Float32x4, u Mask32x4) Float32x4 - -// FusedMultiplyAdd231 performs `(v2 * v3) + v1`. -// -// Asm: VFMADD231PS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedFusedMultiplyAdd231(y Float32x4, z Float32x4, u Mask32x4) Float32x4 - -// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. -// -// Asm: VFMADDSUB132PS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedFusedMultiplyAddSub132(y Float32x4, z Float32x4, u Mask32x4) Float32x4 - -// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. -// -// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedFusedMultiplyAddSub213(y Float32x4, z Float32x4, u Mask32x4) Float32x4 - -// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. -// -// Asm: VFMADDSUB231PS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedFusedMultiplyAddSub231(y Float32x4, z Float32x4, u Mask32x4) Float32x4 - -// FusedMultiplySub132 performs `(v1 * v3) - v2`. -// -// Asm: VFMSUB132PS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedFusedMultiplySub132(y Float32x4, z Float32x4, u Mask32x4) Float32x4 - -// FusedMultiplySub213 performs `(v2 * v1) - v3`. -// -// Asm: VFMSUB213PS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedFusedMultiplySub213(y Float32x4, z Float32x4, u Mask32x4) Float32x4 - -// FusedMultiplySub231 performs `(v2 * v3) - v1`. -// -// Asm: VFMSUB231PS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedFusedMultiplySub231(y Float32x4, z Float32x4, u Mask32x4) Float32x4 - -// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. -// -// Asm: VFMSUBADD132PS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedFusedMultiplySubAdd132(y Float32x4, z Float32x4, u Mask32x4) Float32x4 - -// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. -// -// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedFusedMultiplySubAdd213(y Float32x4, z Float32x4, u Mask32x4) Float32x4 - -// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. -// -// Asm: VFMSUBADD231PS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedFusedMultiplySubAdd231(y Float32x4, z Float32x4, u Mask32x4) Float32x4 - -// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. -// -// Asm: VFNMADD132PS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedFusedNegativeMultiplyAdd132(y Float32x4, z Float32x4, u Mask32x4) Float32x4 - -// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. -// -// Asm: VFNMADD213PS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedFusedNegativeMultiplyAdd213(y Float32x4, z Float32x4, u Mask32x4) Float32x4 - -// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. -// -// Asm: VFNMADD231PS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedFusedNegativeMultiplyAdd231(y Float32x4, z Float32x4, u Mask32x4) Float32x4 - -// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. -// -// Asm: VFNMSUB132PS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedFusedNegativeMultiplySub132(y Float32x4, z Float32x4, u Mask32x4) Float32x4 - -// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. -// -// Asm: VFNMSUB213PS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedFusedNegativeMultiplySub213(y Float32x4, z Float32x4, u Mask32x4) Float32x4 - -// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. -// -// Asm: VFNMSUB231PS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedFusedNegativeMultiplySub231(y Float32x4, z Float32x4, u Mask32x4) Float32x4 - -// FusedMultiplyAdd132 performs `(v1 * v3) + v2`. -// -// Asm: VFMADD132PS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedFusedMultiplyAdd132(y Float32x8, z Float32x8, u Mask32x8) Float32x8 - -// FusedMultiplyAdd213 performs `(v2 * v1) + v3`. -// -// Asm: VFMADD213PS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedFusedMultiplyAdd213(y Float32x8, z Float32x8, u Mask32x8) Float32x8 - -// FusedMultiplyAdd231 performs `(v2 * v3) + v1`. -// -// Asm: VFMADD231PS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedFusedMultiplyAdd231(y Float32x8, z Float32x8, u Mask32x8) Float32x8 - -// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. -// -// Asm: VFMADDSUB132PS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedFusedMultiplyAddSub132(y Float32x8, z Float32x8, u Mask32x8) Float32x8 - -// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. -// -// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedFusedMultiplyAddSub213(y Float32x8, z Float32x8, u Mask32x8) Float32x8 - -// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. -// -// Asm: VFMADDSUB231PS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedFusedMultiplyAddSub231(y Float32x8, z Float32x8, u Mask32x8) Float32x8 - -// FusedMultiplySub132 performs `(v1 * v3) - v2`. -// -// Asm: VFMSUB132PS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedFusedMultiplySub132(y Float32x8, z Float32x8, u Mask32x8) Float32x8 - -// FusedMultiplySub213 performs `(v2 * v1) - v3`. -// -// Asm: VFMSUB213PS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedFusedMultiplySub213(y Float32x8, z Float32x8, u Mask32x8) Float32x8 - -// FusedMultiplySub231 performs `(v2 * v3) - v1`. -// -// Asm: VFMSUB231PS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedFusedMultiplySub231(y Float32x8, z Float32x8, u Mask32x8) Float32x8 - -// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. -// -// Asm: VFMSUBADD132PS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedFusedMultiplySubAdd132(y Float32x8, z Float32x8, u Mask32x8) Float32x8 - -// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. -// -// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedFusedMultiplySubAdd213(y Float32x8, z Float32x8, u Mask32x8) Float32x8 - -// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. -// -// Asm: VFMSUBADD231PS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedFusedMultiplySubAdd231(y Float32x8, z Float32x8, u Mask32x8) Float32x8 - -// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. -// -// Asm: VFNMADD132PS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedFusedNegativeMultiplyAdd132(y Float32x8, z Float32x8, u Mask32x8) Float32x8 - -// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. -// -// Asm: VFNMADD213PS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedFusedNegativeMultiplyAdd213(y Float32x8, z Float32x8, u Mask32x8) Float32x8 - -// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. -// -// Asm: VFNMADD231PS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedFusedNegativeMultiplyAdd231(y Float32x8, z Float32x8, u Mask32x8) Float32x8 - -// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. -// -// Asm: VFNMSUB132PS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedFusedNegativeMultiplySub132(y Float32x8, z Float32x8, u Mask32x8) Float32x8 - -// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. -// -// Asm: VFNMSUB213PS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedFusedNegativeMultiplySub213(y Float32x8, z Float32x8, u Mask32x8) Float32x8 - -// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. -// -// Asm: VFNMSUB231PS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedFusedNegativeMultiplySub231(y Float32x8, z Float32x8, u Mask32x8) Float32x8 - -// FusedMultiplyAdd132 performs `(v1 * v3) + v2`. -// -// Asm: VFMADD132PD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedFusedMultiplyAdd132(y Float64x2, z Float64x2, u Mask64x2) Float64x2 - -// FusedMultiplyAdd213 performs `(v2 * v1) + v3`. -// -// Asm: VFMADD213PD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedFusedMultiplyAdd213(y Float64x2, z Float64x2, u Mask64x2) Float64x2 - -// FusedMultiplyAdd231 performs `(v2 * v3) + v1`. -// -// Asm: VFMADD231PD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedFusedMultiplyAdd231(y Float64x2, z Float64x2, u Mask64x2) Float64x2 - -// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. -// -// Asm: VFMADDSUB132PD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedFusedMultiplyAddSub132(y Float64x2, z Float64x2, u Mask64x2) Float64x2 - -// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. -// -// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedFusedMultiplyAddSub213(y Float64x2, z Float64x2, u Mask64x2) Float64x2 - -// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. -// -// Asm: VFMADDSUB231PD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedFusedMultiplyAddSub231(y Float64x2, z Float64x2, u Mask64x2) Float64x2 - -// FusedMultiplySub132 performs `(v1 * v3) - v2`. -// -// Asm: VFMSUB132PD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedFusedMultiplySub132(y Float64x2, z Float64x2, u Mask64x2) Float64x2 - -// FusedMultiplySub213 performs `(v2 * v1) - v3`. -// -// Asm: VFMSUB213PD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedFusedMultiplySub213(y Float64x2, z Float64x2, u Mask64x2) Float64x2 - -// FusedMultiplySub231 performs `(v2 * v3) - v1`. -// -// Asm: VFMSUB231PD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedFusedMultiplySub231(y Float64x2, z Float64x2, u Mask64x2) Float64x2 - -// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. -// -// Asm: VFMSUBADD132PD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedFusedMultiplySubAdd132(y Float64x2, z Float64x2, u Mask64x2) Float64x2 - -// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. -// -// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedFusedMultiplySubAdd213(y Float64x2, z Float64x2, u Mask64x2) Float64x2 - -// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. -// -// Asm: VFMSUBADD231PD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedFusedMultiplySubAdd231(y Float64x2, z Float64x2, u Mask64x2) Float64x2 - -// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. -// -// Asm: VFNMADD132PD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedFusedNegativeMultiplyAdd132(y Float64x2, z Float64x2, u Mask64x2) Float64x2 - -// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. -// -// Asm: VFNMADD213PD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedFusedNegativeMultiplyAdd213(y Float64x2, z Float64x2, u Mask64x2) Float64x2 - -// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. -// -// Asm: VFNMADD231PD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedFusedNegativeMultiplyAdd231(y Float64x2, z Float64x2, u Mask64x2) Float64x2 - -// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. -// -// Asm: VFNMSUB132PD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedFusedNegativeMultiplySub132(y Float64x2, z Float64x2, u Mask64x2) Float64x2 - -// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. -// -// Asm: VFNMSUB213PD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedFusedNegativeMultiplySub213(y Float64x2, z Float64x2, u Mask64x2) Float64x2 - -// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. -// -// Asm: VFNMSUB231PD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedFusedNegativeMultiplySub231(y Float64x2, z Float64x2, u Mask64x2) Float64x2 - -// FusedMultiplyAdd132 performs `(v1 * v3) + v2`. -// -// Asm: VFMADD132PD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedFusedMultiplyAdd132(y Float64x4, z Float64x4, u Mask64x4) Float64x4 - -// FusedMultiplyAdd213 performs `(v2 * v1) + v3`. -// -// Asm: VFMADD213PD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedFusedMultiplyAdd213(y Float64x4, z Float64x4, u Mask64x4) Float64x4 - -// FusedMultiplyAdd231 performs `(v2 * v3) + v1`. -// -// Asm: VFMADD231PD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedFusedMultiplyAdd231(y Float64x4, z Float64x4, u Mask64x4) Float64x4 - -// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. -// -// Asm: VFMADDSUB132PD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedFusedMultiplyAddSub132(y Float64x4, z Float64x4, u Mask64x4) Float64x4 - -// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. -// -// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedFusedMultiplyAddSub213(y Float64x4, z Float64x4, u Mask64x4) Float64x4 - -// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. -// -// Asm: VFMADDSUB231PD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedFusedMultiplyAddSub231(y Float64x4, z Float64x4, u Mask64x4) Float64x4 - -// FusedMultiplySub132 performs `(v1 * v3) - v2`. -// -// Asm: VFMSUB132PD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedFusedMultiplySub132(y Float64x4, z Float64x4, u Mask64x4) Float64x4 - -// FusedMultiplySub213 performs `(v2 * v1) - v3`. -// -// Asm: VFMSUB213PD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedFusedMultiplySub213(y Float64x4, z Float64x4, u Mask64x4) Float64x4 - -// FusedMultiplySub231 performs `(v2 * v3) - v1`. -// -// Asm: VFMSUB231PD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedFusedMultiplySub231(y Float64x4, z Float64x4, u Mask64x4) Float64x4 - -// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. -// -// Asm: VFMSUBADD132PD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedFusedMultiplySubAdd132(y Float64x4, z Float64x4, u Mask64x4) Float64x4 - -// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. -// -// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedFusedMultiplySubAdd213(y Float64x4, z Float64x4, u Mask64x4) Float64x4 - -// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. -// -// Asm: VFMSUBADD231PD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedFusedMultiplySubAdd231(y Float64x4, z Float64x4, u Mask64x4) Float64x4 - -// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. -// -// Asm: VFNMADD132PD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedFusedNegativeMultiplyAdd132(y Float64x4, z Float64x4, u Mask64x4) Float64x4 - -// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. -// -// Asm: VFNMADD213PD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedFusedNegativeMultiplyAdd213(y Float64x4, z Float64x4, u Mask64x4) Float64x4 - -// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. -// -// Asm: VFNMADD231PD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedFusedNegativeMultiplyAdd231(y Float64x4, z Float64x4, u Mask64x4) Float64x4 - -// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. -// -// Asm: VFNMSUB132PD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedFusedNegativeMultiplySub132(y Float64x4, z Float64x4, u Mask64x4) Float64x4 - -// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. -// -// Asm: VFNMSUB213PD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedFusedNegativeMultiplySub213(y Float64x4, z Float64x4, u Mask64x4) Float64x4 - -// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. -// -// Asm: VFNMSUB231PD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedFusedNegativeMultiplySub231(y Float64x4, z Float64x4, u Mask64x4) Float64x4 - -// FusedMultiplyAdd132 performs `(v1 * v3) + v2`. -// -// Asm: VFMADD132PD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedFusedMultiplyAdd132(y Float64x8, z Float64x8, u Mask64x8) Float64x8 - -// FusedMultiplyAdd213 performs `(v2 * v1) + v3`. -// -// Asm: VFMADD213PD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedFusedMultiplyAdd213(y Float64x8, z Float64x8, u Mask64x8) Float64x8 - -// FusedMultiplyAdd231 performs `(v2 * v3) + v1`. -// -// Asm: VFMADD231PD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedFusedMultiplyAdd231(y Float64x8, z Float64x8, u Mask64x8) Float64x8 - -// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. -// -// Asm: VFMADDSUB132PD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedFusedMultiplyAddSub132(y Float64x8, z Float64x8, u Mask64x8) Float64x8 - -// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. -// -// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedFusedMultiplyAddSub213(y Float64x8, z Float64x8, u Mask64x8) Float64x8 - -// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. -// -// Asm: VFMADDSUB231PD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedFusedMultiplyAddSub231(y Float64x8, z Float64x8, u Mask64x8) Float64x8 - -// FusedMultiplySub132 performs `(v1 * v3) - v2`. -// -// Asm: VFMSUB132PD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedFusedMultiplySub132(y Float64x8, z Float64x8, u Mask64x8) Float64x8 - -// FusedMultiplySub213 performs `(v2 * v1) - v3`. -// -// Asm: VFMSUB213PD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedFusedMultiplySub213(y Float64x8, z Float64x8, u Mask64x8) Float64x8 - -// FusedMultiplySub231 performs `(v2 * v3) - v1`. -// -// Asm: VFMSUB231PD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedFusedMultiplySub231(y Float64x8, z Float64x8, u Mask64x8) Float64x8 - -// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. -// -// Asm: VFMSUBADD132PD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedFusedMultiplySubAdd132(y Float64x8, z Float64x8, u Mask64x8) Float64x8 - -// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. -// -// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedFusedMultiplySubAdd213(y Float64x8, z Float64x8, u Mask64x8) Float64x8 - -// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. -// -// Asm: VFMSUBADD231PD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedFusedMultiplySubAdd231(y Float64x8, z Float64x8, u Mask64x8) Float64x8 - -// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. -// -// Asm: VFNMADD132PD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedFusedNegativeMultiplyAdd132(y Float64x8, z Float64x8, u Mask64x8) Float64x8 - -// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. -// -// Asm: VFNMADD213PD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedFusedNegativeMultiplyAdd213(y Float64x8, z Float64x8, u Mask64x8) Float64x8 - -// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. -// -// Asm: VFNMADD231PD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedFusedNegativeMultiplyAdd231(y Float64x8, z Float64x8, u Mask64x8) Float64x8 - -// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. -// -// Asm: VFNMSUB132PD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedFusedNegativeMultiplySub132(y Float64x8, z Float64x8, u Mask64x8) Float64x8 - -// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. -// -// Asm: VFNMSUB213PD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedFusedNegativeMultiplySub213(y Float64x8, z Float64x8, u Mask64x8) Float64x8 - -// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. -// -// Asm: VFNMSUB231PD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedFusedNegativeMultiplySub231(y Float64x8, z Float64x8, u Mask64x8) Float64x8 - -// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. -// -// Asm: VPDPWSSD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedPairDotProdAccumulate(y Int16x32, z Int32x16, u Mask32x16) Int32x16 - -// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. -// -// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedSaturatedPairDotProdAccumulate(y Int16x32, z Int32x16, u Mask32x16) Int32x16 - -// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Int32x16 - -// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Int32x16 - -// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. -// -// Asm: VPDPWSSD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedPairDotProdAccumulate(y Int16x8, z Int32x4, u Mask32x4) Int32x4 - -// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. -// -// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedSaturatedPairDotProdAccumulate(y Int16x8, z Int32x4, u Mask32x4) Int32x4 - -// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Int32x4 - -// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Int32x4 - -// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. -// -// Asm: VPDPWSSD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedPairDotProdAccumulate(y Int16x16, z Int32x8, u Mask32x8) Int32x8 - -// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. -// -// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedSaturatedPairDotProdAccumulate(y Int16x16, z Int32x8, u Mask32x8) Int32x8 - -// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Int32x8 - -// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Int32x8 - -// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Uint32x16 - -// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Uint32x16 - -// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Uint32x4 - -// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Uint32x4 - -// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Uint32x8 - -// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Uint32x8 - -// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions. -// Const Immediate = 10. -// -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) CeilSuppressExceptionWithPrecision(imm8 uint8) Float32x16 +/* CeilSuppressExceptionWithPrecision */ // CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions. // Const Immediate = 10. @@ -7270,6 +640,12 @@ func (x Float32x4) CeilSuppressExceptionWithPrecision(imm8 uint8) Float32x4 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX func (x Float32x8) CeilSuppressExceptionWithPrecision(imm8 uint8) Float32x8 +// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions. +// Const Immediate = 10. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) CeilSuppressExceptionWithPrecision(imm8 uint8) Float32x16 + // CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions. // Const Immediate = 10. // @@ -7288,11 +664,7 @@ func (x Float64x4) CeilSuppressExceptionWithPrecision(imm8 uint8) Float64x4 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX func (x Float64x8) CeilSuppressExceptionWithPrecision(imm8 uint8) Float64x8 -// CeilWithPrecision rounds elements up with specified precision, masked. -// Const Immediate = 2. -// -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) CeilWithPrecision(imm8 uint8) Float32x16 +/* CeilWithPrecision */ // CeilWithPrecision rounds elements up with specified precision, masked. // Const Immediate = 2. @@ -7306,6 +678,12 @@ func (x Float32x4) CeilWithPrecision(imm8 uint8) Float32x4 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX func (x Float32x8) CeilWithPrecision(imm8 uint8) Float32x8 +// CeilWithPrecision rounds elements up with specified precision, masked. +// Const Immediate = 2. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) CeilWithPrecision(imm8 uint8) Float32x16 + // CeilWithPrecision rounds elements up with specified precision, masked. // Const Immediate = 2. // @@ -7324,11 +702,7 @@ func (x Float64x4) CeilWithPrecision(imm8 uint8) Float64x4 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX func (x Float64x8) CeilWithPrecision(imm8 uint8) Float64x8 -// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions. -// Const Immediate = 10. -// -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float32x16 +/* DiffWithCeilSuppressExceptionWithPrecision */ // DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions. // Const Immediate = 10. @@ -7342,6 +716,12 @@ func (x Float32x4) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float3 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX func (x Float32x8) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float32x8 +// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions. +// Const Immediate = 10. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float32x16 + // DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions. // Const Immediate = 10. // @@ -7360,11 +740,7 @@ func (x Float64x4) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float6 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX func (x Float64x8) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float64x8 -// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. -// Const Immediate = 2. -// -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) DiffWithCeilWithPrecision(imm8 uint8) Float32x16 +/* DiffWithCeilWithPrecision */ // DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. // Const Immediate = 2. @@ -7378,6 +754,12 @@ func (x Float32x4) DiffWithCeilWithPrecision(imm8 uint8) Float32x4 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX func (x Float32x8) DiffWithCeilWithPrecision(imm8 uint8) Float32x8 +// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. +// Const Immediate = 2. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) DiffWithCeilWithPrecision(imm8 uint8) Float32x16 + // DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. // Const Immediate = 2. // @@ -7396,11 +778,7 @@ func (x Float64x4) DiffWithCeilWithPrecision(imm8 uint8) Float64x4 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX func (x Float64x8) DiffWithCeilWithPrecision(imm8 uint8) Float64x8 -// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions. -// Const Immediate = 9. -// -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float32x16 +/* DiffWithFloorSuppressExceptionWithPrecision */ // DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions. // Const Immediate = 9. @@ -7414,6 +792,12 @@ func (x Float32x4) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float // Asm: VREDUCEPS, CPU Feature: AVX512EVEX func (x Float32x8) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float32x8 +// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions. +// Const Immediate = 9. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float32x16 + // DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions. // Const Immediate = 9. // @@ -7432,11 +816,7 @@ func (x Float64x4) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float // Asm: VREDUCEPD, CPU Feature: AVX512EVEX func (x Float64x8) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float64x8 -// DiffWithFloorWithPrecision computes the difference after flooring with specified precision. -// Const Immediate = 1. -// -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) DiffWithFloorWithPrecision(imm8 uint8) Float32x16 +/* DiffWithFloorWithPrecision */ // DiffWithFloorWithPrecision computes the difference after flooring with specified precision. // Const Immediate = 1. @@ -7450,6 +830,12 @@ func (x Float32x4) DiffWithFloorWithPrecision(imm8 uint8) Float32x4 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX func (x Float32x8) DiffWithFloorWithPrecision(imm8 uint8) Float32x8 +// DiffWithFloorWithPrecision computes the difference after flooring with specified precision. +// Const Immediate = 1. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) DiffWithFloorWithPrecision(imm8 uint8) Float32x16 + // DiffWithFloorWithPrecision computes the difference after flooring with specified precision. // Const Immediate = 1. // @@ -7468,11 +854,7 @@ func (x Float64x4) DiffWithFloorWithPrecision(imm8 uint8) Float64x4 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX func (x Float64x8) DiffWithFloorWithPrecision(imm8 uint8) Float64x8 -// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions. -// Const Immediate = 8. -// -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float32x16 +/* DiffWithRoundSuppressExceptionWithPrecision */ // DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions. // Const Immediate = 8. @@ -7486,6 +868,12 @@ func (x Float32x4) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float // Asm: VREDUCEPS, CPU Feature: AVX512EVEX func (x Float32x8) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float32x8 +// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions. +// Const Immediate = 8. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float32x16 + // DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions. // Const Immediate = 8. // @@ -7504,11 +892,7 @@ func (x Float64x4) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float // Asm: VREDUCEPD, CPU Feature: AVX512EVEX func (x Float64x8) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float64x8 -// DiffWithRoundWithPrecision computes the difference after rounding with specified precision. -// Const Immediate = 0. -// -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) DiffWithRoundWithPrecision(imm8 uint8) Float32x16 +/* DiffWithRoundWithPrecision */ // DiffWithRoundWithPrecision computes the difference after rounding with specified precision. // Const Immediate = 0. @@ -7522,6 +906,12 @@ func (x Float32x4) DiffWithRoundWithPrecision(imm8 uint8) Float32x4 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX func (x Float32x8) DiffWithRoundWithPrecision(imm8 uint8) Float32x8 +// DiffWithRoundWithPrecision computes the difference after rounding with specified precision. +// Const Immediate = 0. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) DiffWithRoundWithPrecision(imm8 uint8) Float32x16 + // DiffWithRoundWithPrecision computes the difference after rounding with specified precision. // Const Immediate = 0. // @@ -7540,11 +930,7 @@ func (x Float64x4) DiffWithRoundWithPrecision(imm8 uint8) Float64x4 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX func (x Float64x8) DiffWithRoundWithPrecision(imm8 uint8) Float64x8 -// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions. -// Const Immediate = 11. -// -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float32x16 +/* DiffWithTruncSuppressExceptionWithPrecision */ // DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions. // Const Immediate = 11. @@ -7558,6 +944,12 @@ func (x Float32x4) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float // Asm: VREDUCEPS, CPU Feature: AVX512EVEX func (x Float32x8) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float32x8 +// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions. +// Const Immediate = 11. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float32x16 + // DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions. // Const Immediate = 11. // @@ -7576,11 +968,7 @@ func (x Float64x4) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float // Asm: VREDUCEPD, CPU Feature: AVX512EVEX func (x Float64x8) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float64x8 -// DiffWithTruncWithPrecision computes the difference after truncating with specified precision. -// Const Immediate = 3. -// -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) DiffWithTruncWithPrecision(imm8 uint8) Float32x16 +/* DiffWithTruncWithPrecision */ // DiffWithTruncWithPrecision computes the difference after truncating with specified precision. // Const Immediate = 3. @@ -7594,6 +982,12 @@ func (x Float32x4) DiffWithTruncWithPrecision(imm8 uint8) Float32x4 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX func (x Float32x8) DiffWithTruncWithPrecision(imm8 uint8) Float32x8 +// DiffWithTruncWithPrecision computes the difference after truncating with specified precision. +// Const Immediate = 3. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) DiffWithTruncWithPrecision(imm8 uint8) Float32x16 + // DiffWithTruncWithPrecision computes the difference after truncating with specified precision. // Const Immediate = 3. // @@ -7612,11 +1006,255 @@ func (x Float64x4) DiffWithTruncWithPrecision(imm8 uint8) Float64x4 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX func (x Float64x8) DiffWithTruncWithPrecision(imm8 uint8) Float64x8 -// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked. -// Const Immediate = 9. +/* Div */ + +// Div divides elements of two vectors. // -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) FloorSuppressExceptionWithPrecision(imm8 uint8) Float32x16 +// Asm: VDIVPS, CPU Feature: AVX +func (x Float32x4) Div(y Float32x4) Float32x4 + +// Div divides elements of two vectors. +// +// Asm: VDIVPS, CPU Feature: AVX +func (x Float32x8) Div(y Float32x8) Float32x8 + +// Div divides elements of two vectors. +// +// Asm: VDIVPS, CPU Feature: AVX512EVEX +func (x Float32x16) Div(y Float32x16) Float32x16 + +// Div divides elements of two vectors. +// +// Asm: VDIVPD, CPU Feature: AVX +func (x Float64x2) Div(y Float64x2) Float64x2 + +// Div divides elements of two vectors. +// +// Asm: VDIVPD, CPU Feature: AVX +func (x Float64x4) Div(y Float64x4) Float64x4 + +// Div divides elements of two vectors. +// +// Asm: VDIVPD, CPU Feature: AVX512EVEX +func (x Float64x8) Div(y Float64x8) Float64x8 + +/* DotProdBroadcast */ + +// DotProdBroadcast multiplies all elements and broadcasts the sum. +// Const Immediate = 127. +// +// Asm: VDPPD, CPU Feature: AVX +func (x Float64x2) DotProdBroadcast(y Float64x2) Float64x2 + +/* Equal */ + +// Equal compares for equality. +// Const Immediate = 0. +// +// Asm: VPCMPEQB, CPU Feature: AVX +func (x Int8x16) Equal(y Int8x16) Mask8x16 + +// Equal compares for equality. +// Const Immediate = 0. +// +// Asm: VPCMPEQB, CPU Feature: AVX2 +func (x Int8x32) Equal(y Int8x32) Mask8x32 + +// Equal compares for equality. +// Const Immediate = 0. +// +// Asm: VPCMPEQW, CPU Feature: AVX +func (x Int16x8) Equal(y Int16x8) Mask16x8 + +// Equal compares for equality. +// Const Immediate = 0. +// +// Asm: VPCMPEQW, CPU Feature: AVX2 +func (x Int16x16) Equal(y Int16x16) Mask16x16 + +// Equal compares for equality. +// Const Immediate = 0. +// +// Asm: VPCMPEQD, CPU Feature: AVX +func (x Int32x4) Equal(y Int32x4) Mask32x4 + +// Equal compares for equality. +// Const Immediate = 0. +// +// Asm: VPCMPEQD, CPU Feature: AVX2 +func (x Int32x8) Equal(y Int32x8) Mask32x8 + +// Equal compares for equality. +// Const Immediate = 0. +// +// Asm: VPCMPEQQ, CPU Feature: AVX +func (x Int64x2) Equal(y Int64x2) Mask64x2 + +// Equal compares for equality. +// Const Immediate = 0. +// +// Asm: VPCMPEQQ, CPU Feature: AVX2 +func (x Int64x4) Equal(y Int64x4) Mask64x4 + +// Equal compares for equality. +// Const Immediate = 0. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x4) Equal(y Float32x4) Mask32x4 + +// Equal compares for equality. +// Const Immediate = 0. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x8) Equal(y Float32x8) Mask32x8 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x16) Equal(y Float32x16) Mask32x16 + +// Equal compares for equality. +// Const Immediate = 0. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x2) Equal(y Float64x2) Mask64x2 + +// Equal compares for equality. +// Const Immediate = 0. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x4) Equal(y Float64x4) Mask64x4 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x8) Equal(y Float64x8) Mask64x8 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x64) Equal(y Int8x64) Mask8x64 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x32) Equal(y Int16x32) Mask16x32 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x16) Equal(y Int32x16) Mask32x16 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x8) Equal(y Int64x8) Mask64x8 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x16) Equal(y Uint8x16) Mask8x16 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x32) Equal(y Uint8x32) Mask8x32 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x64) Equal(y Uint8x64) Mask8x64 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x8) Equal(y Uint16x8) Mask16x8 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x16) Equal(y Uint16x16) Mask16x16 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x32) Equal(y Uint16x32) Mask16x32 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x4) Equal(y Uint32x4) Mask32x4 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x8) Equal(y Uint32x8) Mask32x8 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x16) Equal(y Uint32x16) Mask32x16 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x2) Equal(y Uint64x2) Mask64x2 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x4) Equal(y Uint64x4) Mask64x4 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x8) Equal(y Uint64x8) Mask64x8 + +/* Floor */ + +// Floor rounds elements down to the nearest integer. +// Const Immediate = 1. +// +// Asm: VROUNDPS, CPU Feature: AVX +func (x Float32x4) Floor() Float32x4 + +// Floor rounds elements down to the nearest integer. +// Const Immediate = 1. +// +// Asm: VROUNDPS, CPU Feature: AVX +func (x Float32x8) Floor() Float32x8 + +// Floor rounds elements down to the nearest integer. +// Const Immediate = 1. +// +// Asm: VROUNDPD, CPU Feature: AVX +func (x Float64x2) Floor() Float64x2 + +// Floor rounds elements down to the nearest integer. +// Const Immediate = 1. +// +// Asm: VROUNDPD, CPU Feature: AVX +func (x Float64x4) Floor() Float64x4 + +/* FloorSuppressExceptionWithPrecision */ // FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked. // Const Immediate = 9. @@ -7630,6 +1268,12 @@ func (x Float32x4) FloorSuppressExceptionWithPrecision(imm8 uint8) Float32x4 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX func (x Float32x8) FloorSuppressExceptionWithPrecision(imm8 uint8) Float32x8 +// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked. +// Const Immediate = 9. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) FloorSuppressExceptionWithPrecision(imm8 uint8) Float32x16 + // FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked. // Const Immediate = 9. // @@ -7648,11 +1292,7 @@ func (x Float64x4) FloorSuppressExceptionWithPrecision(imm8 uint8) Float64x4 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX func (x Float64x8) FloorSuppressExceptionWithPrecision(imm8 uint8) Float64x8 -// FloorWithPrecision rounds elements down with specified precision, masked. -// Const Immediate = 1. -// -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) FloorWithPrecision(imm8 uint8) Float32x16 +/* FloorWithPrecision */ // FloorWithPrecision rounds elements down with specified precision, masked. // Const Immediate = 1. @@ -7666,6 +1306,12 @@ func (x Float32x4) FloorWithPrecision(imm8 uint8) Float32x4 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX func (x Float32x8) FloorWithPrecision(imm8 uint8) Float32x8 +// FloorWithPrecision rounds elements down with specified precision, masked. +// Const Immediate = 1. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) FloorWithPrecision(imm8 uint8) Float32x16 + // FloorWithPrecision rounds elements down with specified precision, masked. // Const Immediate = 1. // @@ -7684,155 +1330,1843 @@ func (x Float64x4) FloorWithPrecision(imm8 uint8) Float64x4 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX func (x Float64x8) FloorWithPrecision(imm8 uint8) Float64x8 -// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions. -// Const Immediate = 8. -// -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) RoundSuppressExceptionWithPrecision(imm8 uint8) Float32x16 +/* FusedMultiplyAdd132 */ -// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions. -// Const Immediate = 8. +// FusedMultiplyAdd132 performs `(v1 * v3) + v2`. // -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x4) RoundSuppressExceptionWithPrecision(imm8 uint8) Float32x4 +// Asm: VFMADD132PS, CPU Feature: AVX512EVEX +func (x Float32x4) FusedMultiplyAdd132(y Float32x4, z Float32x4) Float32x4 -// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions. -// Const Immediate = 8. +// FusedMultiplyAdd132 performs `(v1 * v3) + v2`. // -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x8) RoundSuppressExceptionWithPrecision(imm8 uint8) Float32x8 +// Asm: VFMADD132PS, CPU Feature: AVX512EVEX +func (x Float32x8) FusedMultiplyAdd132(y Float32x8, z Float32x8) Float32x8 -// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions. -// Const Immediate = 8. +// FusedMultiplyAdd132 performs `(v1 * v3) + v2`. // -// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x2) RoundSuppressExceptionWithPrecision(imm8 uint8) Float64x2 +// Asm: VFMADD132PS, CPU Feature: AVX512EVEX +func (x Float32x16) FusedMultiplyAdd132(y Float32x16, z Float32x16) Float32x16 -// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions. -// Const Immediate = 8. +// FusedMultiplyAdd132 performs `(v1 * v3) + v2`. // -// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x4) RoundSuppressExceptionWithPrecision(imm8 uint8) Float64x4 +// Asm: VFMADD132PD, CPU Feature: AVX512EVEX +func (x Float64x2) FusedMultiplyAdd132(y Float64x2, z Float64x2) Float64x2 -// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions. -// Const Immediate = 8. +// FusedMultiplyAdd132 performs `(v1 * v3) + v2`. // -// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x8) RoundSuppressExceptionWithPrecision(imm8 uint8) Float64x8 +// Asm: VFMADD132PD, CPU Feature: AVX512EVEX +func (x Float64x4) FusedMultiplyAdd132(y Float64x4, z Float64x4) Float64x4 -// RoundWithPrecision rounds elements with specified precision. -// Const Immediate = 0. +// FusedMultiplyAdd132 performs `(v1 * v3) + v2`. // -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) RoundWithPrecision(imm8 uint8) Float32x16 +// Asm: VFMADD132PD, CPU Feature: AVX512EVEX +func (x Float64x8) FusedMultiplyAdd132(y Float64x8, z Float64x8) Float64x8 -// RoundWithPrecision rounds elements with specified precision. -// Const Immediate = 0. +/* FusedMultiplyAdd213 */ + +// FusedMultiplyAdd213 performs `(v2 * v1) + v3`. // -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x4) RoundWithPrecision(imm8 uint8) Float32x4 +// Asm: VFMADD213PS, CPU Feature: AVX512EVEX +func (x Float32x4) FusedMultiplyAdd213(y Float32x4, z Float32x4) Float32x4 -// RoundWithPrecision rounds elements with specified precision. -// Const Immediate = 0. +// FusedMultiplyAdd213 performs `(v2 * v1) + v3`. // -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x8) RoundWithPrecision(imm8 uint8) Float32x8 +// Asm: VFMADD213PS, CPU Feature: AVX512EVEX +func (x Float32x8) FusedMultiplyAdd213(y Float32x8, z Float32x8) Float32x8 -// RoundWithPrecision rounds elements with specified precision. -// Const Immediate = 0. +// FusedMultiplyAdd213 performs `(v2 * v1) + v3`. // -// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x2) RoundWithPrecision(imm8 uint8) Float64x2 +// Asm: VFMADD213PS, CPU Feature: AVX512EVEX +func (x Float32x16) FusedMultiplyAdd213(y Float32x16, z Float32x16) Float32x16 -// RoundWithPrecision rounds elements with specified precision. -// Const Immediate = 0. +// FusedMultiplyAdd213 performs `(v2 * v1) + v3`. // -// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x4) RoundWithPrecision(imm8 uint8) Float64x4 +// Asm: VFMADD213PD, CPU Feature: AVX512EVEX +func (x Float64x2) FusedMultiplyAdd213(y Float64x2, z Float64x2) Float64x2 -// RoundWithPrecision rounds elements with specified precision. -// Const Immediate = 0. +// FusedMultiplyAdd213 performs `(v2 * v1) + v3`. // -// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x8) RoundWithPrecision(imm8 uint8) Float64x8 +// Asm: VFMADD213PD, CPU Feature: AVX512EVEX +func (x Float64x4) FusedMultiplyAdd213(y Float64x4, z Float64x4) Float64x4 -// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions. -// Const Immediate = 11. +// FusedMultiplyAdd213 performs `(v2 * v1) + v3`. // -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) TruncSuppressExceptionWithPrecision(imm8 uint8) Float32x16 +// Asm: VFMADD213PD, CPU Feature: AVX512EVEX +func (x Float64x8) FusedMultiplyAdd213(y Float64x8, z Float64x8) Float64x8 -// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions. -// Const Immediate = 11. +/* FusedMultiplyAdd231 */ + +// FusedMultiplyAdd231 performs `(v2 * v3) + v1`. // -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x4) TruncSuppressExceptionWithPrecision(imm8 uint8) Float32x4 +// Asm: VFMADD231PS, CPU Feature: AVX512EVEX +func (x Float32x4) FusedMultiplyAdd231(y Float32x4, z Float32x4) Float32x4 -// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions. -// Const Immediate = 11. +// FusedMultiplyAdd231 performs `(v2 * v3) + v1`. // -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x8) TruncSuppressExceptionWithPrecision(imm8 uint8) Float32x8 +// Asm: VFMADD231PS, CPU Feature: AVX512EVEX +func (x Float32x8) FusedMultiplyAdd231(y Float32x8, z Float32x8) Float32x8 -// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions. -// Const Immediate = 11. +// FusedMultiplyAdd231 performs `(v2 * v3) + v1`. // -// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x2) TruncSuppressExceptionWithPrecision(imm8 uint8) Float64x2 +// Asm: VFMADD231PS, CPU Feature: AVX512EVEX +func (x Float32x16) FusedMultiplyAdd231(y Float32x16, z Float32x16) Float32x16 -// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions. -// Const Immediate = 11. +// FusedMultiplyAdd231 performs `(v2 * v3) + v1`. // -// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x4) TruncSuppressExceptionWithPrecision(imm8 uint8) Float64x4 +// Asm: VFMADD231PD, CPU Feature: AVX512EVEX +func (x Float64x2) FusedMultiplyAdd231(y Float64x2, z Float64x2) Float64x2 -// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions. -// Const Immediate = 11. +// FusedMultiplyAdd231 performs `(v2 * v3) + v1`. // -// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x8) TruncSuppressExceptionWithPrecision(imm8 uint8) Float64x8 +// Asm: VFMADD231PD, CPU Feature: AVX512EVEX +func (x Float64x4) FusedMultiplyAdd231(y Float64x4, z Float64x4) Float64x4 -// TruncWithPrecision truncates elements with specified precision. +// FusedMultiplyAdd231 performs `(v2 * v3) + v1`. +// +// Asm: VFMADD231PD, CPU Feature: AVX512EVEX +func (x Float64x8) FusedMultiplyAdd231(y Float64x8, z Float64x8) Float64x8 + +/* FusedMultiplyAddSub132 */ + +// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. +// +// Asm: VFMADDSUB132PS, CPU Feature: AVX512EVEX +func (x Float32x4) FusedMultiplyAddSub132(y Float32x4, z Float32x4) Float32x4 + +// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. +// +// Asm: VFMADDSUB132PS, CPU Feature: AVX512EVEX +func (x Float32x8) FusedMultiplyAddSub132(y Float32x8, z Float32x8) Float32x8 + +// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. +// +// Asm: VFMADDSUB132PS, CPU Feature: AVX512EVEX +func (x Float32x16) FusedMultiplyAddSub132(y Float32x16, z Float32x16) Float32x16 + +// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. +// +// Asm: VFMADDSUB132PD, CPU Feature: AVX512EVEX +func (x Float64x2) FusedMultiplyAddSub132(y Float64x2, z Float64x2) Float64x2 + +// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. +// +// Asm: VFMADDSUB132PD, CPU Feature: AVX512EVEX +func (x Float64x4) FusedMultiplyAddSub132(y Float64x4, z Float64x4) Float64x4 + +// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. +// +// Asm: VFMADDSUB132PD, CPU Feature: AVX512EVEX +func (x Float64x8) FusedMultiplyAddSub132(y Float64x8, z Float64x8) Float64x8 + +/* FusedMultiplyAddSub213 */ + +// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. +// +// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX +func (x Float32x4) FusedMultiplyAddSub213(y Float32x4, z Float32x4) Float32x4 + +// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. +// +// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX +func (x Float32x8) FusedMultiplyAddSub213(y Float32x8, z Float32x8) Float32x8 + +// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. +// +// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX +func (x Float32x16) FusedMultiplyAddSub213(y Float32x16, z Float32x16) Float32x16 + +// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. +// +// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX +func (x Float64x2) FusedMultiplyAddSub213(y Float64x2, z Float64x2) Float64x2 + +// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. +// +// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX +func (x Float64x4) FusedMultiplyAddSub213(y Float64x4, z Float64x4) Float64x4 + +// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. +// +// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX +func (x Float64x8) FusedMultiplyAddSub213(y Float64x8, z Float64x8) Float64x8 + +/* FusedMultiplyAddSub231 */ + +// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. +// +// Asm: VFMADDSUB231PS, CPU Feature: AVX512EVEX +func (x Float32x4) FusedMultiplyAddSub231(y Float32x4, z Float32x4) Float32x4 + +// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. +// +// Asm: VFMADDSUB231PS, CPU Feature: AVX512EVEX +func (x Float32x8) FusedMultiplyAddSub231(y Float32x8, z Float32x8) Float32x8 + +// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. +// +// Asm: VFMADDSUB231PS, CPU Feature: AVX512EVEX +func (x Float32x16) FusedMultiplyAddSub231(y Float32x16, z Float32x16) Float32x16 + +// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. +// +// Asm: VFMADDSUB231PD, CPU Feature: AVX512EVEX +func (x Float64x2) FusedMultiplyAddSub231(y Float64x2, z Float64x2) Float64x2 + +// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. +// +// Asm: VFMADDSUB231PD, CPU Feature: AVX512EVEX +func (x Float64x4) FusedMultiplyAddSub231(y Float64x4, z Float64x4) Float64x4 + +// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. +// +// Asm: VFMADDSUB231PD, CPU Feature: AVX512EVEX +func (x Float64x8) FusedMultiplyAddSub231(y Float64x8, z Float64x8) Float64x8 + +/* FusedMultiplySub132 */ + +// FusedMultiplySub132 performs `(v1 * v3) - v2`. +// +// Asm: VFMSUB132PS, CPU Feature: AVX512EVEX +func (x Float32x4) FusedMultiplySub132(y Float32x4, z Float32x4) Float32x4 + +// FusedMultiplySub132 performs `(v1 * v3) - v2`. +// +// Asm: VFMSUB132PS, CPU Feature: AVX512EVEX +func (x Float32x8) FusedMultiplySub132(y Float32x8, z Float32x8) Float32x8 + +// FusedMultiplySub132 performs `(v1 * v3) - v2`. +// +// Asm: VFMSUB132PS, CPU Feature: AVX512EVEX +func (x Float32x16) FusedMultiplySub132(y Float32x16, z Float32x16) Float32x16 + +// FusedMultiplySub132 performs `(v1 * v3) - v2`. +// +// Asm: VFMSUB132PD, CPU Feature: AVX512EVEX +func (x Float64x2) FusedMultiplySub132(y Float64x2, z Float64x2) Float64x2 + +// FusedMultiplySub132 performs `(v1 * v3) - v2`. +// +// Asm: VFMSUB132PD, CPU Feature: AVX512EVEX +func (x Float64x4) FusedMultiplySub132(y Float64x4, z Float64x4) Float64x4 + +// FusedMultiplySub132 performs `(v1 * v3) - v2`. +// +// Asm: VFMSUB132PD, CPU Feature: AVX512EVEX +func (x Float64x8) FusedMultiplySub132(y Float64x8, z Float64x8) Float64x8 + +/* FusedMultiplySub213 */ + +// FusedMultiplySub213 performs `(v2 * v1) - v3`. +// +// Asm: VFMSUB213PS, CPU Feature: AVX512EVEX +func (x Float32x4) FusedMultiplySub213(y Float32x4, z Float32x4) Float32x4 + +// FusedMultiplySub213 performs `(v2 * v1) - v3`. +// +// Asm: VFMSUB213PS, CPU Feature: AVX512EVEX +func (x Float32x8) FusedMultiplySub213(y Float32x8, z Float32x8) Float32x8 + +// FusedMultiplySub213 performs `(v2 * v1) - v3`. +// +// Asm: VFMSUB213PS, CPU Feature: AVX512EVEX +func (x Float32x16) FusedMultiplySub213(y Float32x16, z Float32x16) Float32x16 + +// FusedMultiplySub213 performs `(v2 * v1) - v3`. +// +// Asm: VFMSUB213PD, CPU Feature: AVX512EVEX +func (x Float64x2) FusedMultiplySub213(y Float64x2, z Float64x2) Float64x2 + +// FusedMultiplySub213 performs `(v2 * v1) - v3`. +// +// Asm: VFMSUB213PD, CPU Feature: AVX512EVEX +func (x Float64x4) FusedMultiplySub213(y Float64x4, z Float64x4) Float64x4 + +// FusedMultiplySub213 performs `(v2 * v1) - v3`. +// +// Asm: VFMSUB213PD, CPU Feature: AVX512EVEX +func (x Float64x8) FusedMultiplySub213(y Float64x8, z Float64x8) Float64x8 + +/* FusedMultiplySub231 */ + +// FusedMultiplySub231 performs `(v2 * v3) - v1`. +// +// Asm: VFMSUB231PS, CPU Feature: AVX512EVEX +func (x Float32x4) FusedMultiplySub231(y Float32x4, z Float32x4) Float32x4 + +// FusedMultiplySub231 performs `(v2 * v3) - v1`. +// +// Asm: VFMSUB231PS, CPU Feature: AVX512EVEX +func (x Float32x8) FusedMultiplySub231(y Float32x8, z Float32x8) Float32x8 + +// FusedMultiplySub231 performs `(v2 * v3) - v1`. +// +// Asm: VFMSUB231PS, CPU Feature: AVX512EVEX +func (x Float32x16) FusedMultiplySub231(y Float32x16, z Float32x16) Float32x16 + +// FusedMultiplySub231 performs `(v2 * v3) - v1`. +// +// Asm: VFMSUB231PD, CPU Feature: AVX512EVEX +func (x Float64x2) FusedMultiplySub231(y Float64x2, z Float64x2) Float64x2 + +// FusedMultiplySub231 performs `(v2 * v3) - v1`. +// +// Asm: VFMSUB231PD, CPU Feature: AVX512EVEX +func (x Float64x4) FusedMultiplySub231(y Float64x4, z Float64x4) Float64x4 + +// FusedMultiplySub231 performs `(v2 * v3) - v1`. +// +// Asm: VFMSUB231PD, CPU Feature: AVX512EVEX +func (x Float64x8) FusedMultiplySub231(y Float64x8, z Float64x8) Float64x8 + +/* FusedMultiplySubAdd132 */ + +// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. +// +// Asm: VFMSUBADD132PS, CPU Feature: AVX512EVEX +func (x Float32x4) FusedMultiplySubAdd132(y Float32x4, z Float32x4) Float32x4 + +// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. +// +// Asm: VFMSUBADD132PS, CPU Feature: AVX512EVEX +func (x Float32x8) FusedMultiplySubAdd132(y Float32x8, z Float32x8) Float32x8 + +// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. +// +// Asm: VFMSUBADD132PS, CPU Feature: AVX512EVEX +func (x Float32x16) FusedMultiplySubAdd132(y Float32x16, z Float32x16) Float32x16 + +// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. +// +// Asm: VFMSUBADD132PD, CPU Feature: AVX512EVEX +func (x Float64x2) FusedMultiplySubAdd132(y Float64x2, z Float64x2) Float64x2 + +// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. +// +// Asm: VFMSUBADD132PD, CPU Feature: AVX512EVEX +func (x Float64x4) FusedMultiplySubAdd132(y Float64x4, z Float64x4) Float64x4 + +// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. +// +// Asm: VFMSUBADD132PD, CPU Feature: AVX512EVEX +func (x Float64x8) FusedMultiplySubAdd132(y Float64x8, z Float64x8) Float64x8 + +/* FusedMultiplySubAdd213 */ + +// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. +// +// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX +func (x Float32x4) FusedMultiplySubAdd213(y Float32x4, z Float32x4) Float32x4 + +// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. +// +// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX +func (x Float32x8) FusedMultiplySubAdd213(y Float32x8, z Float32x8) Float32x8 + +// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. +// +// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX +func (x Float32x16) FusedMultiplySubAdd213(y Float32x16, z Float32x16) Float32x16 + +// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. +// +// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX +func (x Float64x2) FusedMultiplySubAdd213(y Float64x2, z Float64x2) Float64x2 + +// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. +// +// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX +func (x Float64x4) FusedMultiplySubAdd213(y Float64x4, z Float64x4) Float64x4 + +// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. +// +// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX +func (x Float64x8) FusedMultiplySubAdd213(y Float64x8, z Float64x8) Float64x8 + +/* FusedMultiplySubAdd231 */ + +// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. +// +// Asm: VFMSUBADD231PS, CPU Feature: AVX512EVEX +func (x Float32x4) FusedMultiplySubAdd231(y Float32x4, z Float32x4) Float32x4 + +// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. +// +// Asm: VFMSUBADD231PS, CPU Feature: AVX512EVEX +func (x Float32x8) FusedMultiplySubAdd231(y Float32x8, z Float32x8) Float32x8 + +// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. +// +// Asm: VFMSUBADD231PS, CPU Feature: AVX512EVEX +func (x Float32x16) FusedMultiplySubAdd231(y Float32x16, z Float32x16) Float32x16 + +// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. +// +// Asm: VFMSUBADD231PD, CPU Feature: AVX512EVEX +func (x Float64x2) FusedMultiplySubAdd231(y Float64x2, z Float64x2) Float64x2 + +// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. +// +// Asm: VFMSUBADD231PD, CPU Feature: AVX512EVEX +func (x Float64x4) FusedMultiplySubAdd231(y Float64x4, z Float64x4) Float64x4 + +// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. +// +// Asm: VFMSUBADD231PD, CPU Feature: AVX512EVEX +func (x Float64x8) FusedMultiplySubAdd231(y Float64x8, z Float64x8) Float64x8 + +/* FusedNegativeMultiplyAdd132 */ + +// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. +// +// Asm: VFNMADD132PS, CPU Feature: AVX512EVEX +func (x Float32x4) FusedNegativeMultiplyAdd132(y Float32x4, z Float32x4) Float32x4 + +// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. +// +// Asm: VFNMADD132PS, CPU Feature: AVX512EVEX +func (x Float32x8) FusedNegativeMultiplyAdd132(y Float32x8, z Float32x8) Float32x8 + +// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. +// +// Asm: VFNMADD132PS, CPU Feature: AVX512EVEX +func (x Float32x16) FusedNegativeMultiplyAdd132(y Float32x16, z Float32x16) Float32x16 + +// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. +// +// Asm: VFNMADD132PD, CPU Feature: AVX512EVEX +func (x Float64x2) FusedNegativeMultiplyAdd132(y Float64x2, z Float64x2) Float64x2 + +// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. +// +// Asm: VFNMADD132PD, CPU Feature: AVX512EVEX +func (x Float64x4) FusedNegativeMultiplyAdd132(y Float64x4, z Float64x4) Float64x4 + +// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. +// +// Asm: VFNMADD132PD, CPU Feature: AVX512EVEX +func (x Float64x8) FusedNegativeMultiplyAdd132(y Float64x8, z Float64x8) Float64x8 + +/* FusedNegativeMultiplyAdd213 */ + +// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. +// +// Asm: VFNMADD213PS, CPU Feature: AVX512EVEX +func (x Float32x4) FusedNegativeMultiplyAdd213(y Float32x4, z Float32x4) Float32x4 + +// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. +// +// Asm: VFNMADD213PS, CPU Feature: AVX512EVEX +func (x Float32x8) FusedNegativeMultiplyAdd213(y Float32x8, z Float32x8) Float32x8 + +// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. +// +// Asm: VFNMADD213PS, CPU Feature: AVX512EVEX +func (x Float32x16) FusedNegativeMultiplyAdd213(y Float32x16, z Float32x16) Float32x16 + +// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. +// +// Asm: VFNMADD213PD, CPU Feature: AVX512EVEX +func (x Float64x2) FusedNegativeMultiplyAdd213(y Float64x2, z Float64x2) Float64x2 + +// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. +// +// Asm: VFNMADD213PD, CPU Feature: AVX512EVEX +func (x Float64x4) FusedNegativeMultiplyAdd213(y Float64x4, z Float64x4) Float64x4 + +// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. +// +// Asm: VFNMADD213PD, CPU Feature: AVX512EVEX +func (x Float64x8) FusedNegativeMultiplyAdd213(y Float64x8, z Float64x8) Float64x8 + +/* FusedNegativeMultiplyAdd231 */ + +// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. +// +// Asm: VFNMADD231PS, CPU Feature: AVX512EVEX +func (x Float32x4) FusedNegativeMultiplyAdd231(y Float32x4, z Float32x4) Float32x4 + +// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. +// +// Asm: VFNMADD231PS, CPU Feature: AVX512EVEX +func (x Float32x8) FusedNegativeMultiplyAdd231(y Float32x8, z Float32x8) Float32x8 + +// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. +// +// Asm: VFNMADD231PS, CPU Feature: AVX512EVEX +func (x Float32x16) FusedNegativeMultiplyAdd231(y Float32x16, z Float32x16) Float32x16 + +// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. +// +// Asm: VFNMADD231PD, CPU Feature: AVX512EVEX +func (x Float64x2) FusedNegativeMultiplyAdd231(y Float64x2, z Float64x2) Float64x2 + +// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. +// +// Asm: VFNMADD231PD, CPU Feature: AVX512EVEX +func (x Float64x4) FusedNegativeMultiplyAdd231(y Float64x4, z Float64x4) Float64x4 + +// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. +// +// Asm: VFNMADD231PD, CPU Feature: AVX512EVEX +func (x Float64x8) FusedNegativeMultiplyAdd231(y Float64x8, z Float64x8) Float64x8 + +/* FusedNegativeMultiplySub132 */ + +// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. +// +// Asm: VFNMSUB132PS, CPU Feature: AVX512EVEX +func (x Float32x4) FusedNegativeMultiplySub132(y Float32x4, z Float32x4) Float32x4 + +// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. +// +// Asm: VFNMSUB132PS, CPU Feature: AVX512EVEX +func (x Float32x8) FusedNegativeMultiplySub132(y Float32x8, z Float32x8) Float32x8 + +// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. +// +// Asm: VFNMSUB132PS, CPU Feature: AVX512EVEX +func (x Float32x16) FusedNegativeMultiplySub132(y Float32x16, z Float32x16) Float32x16 + +// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. +// +// Asm: VFNMSUB132PD, CPU Feature: AVX512EVEX +func (x Float64x2) FusedNegativeMultiplySub132(y Float64x2, z Float64x2) Float64x2 + +// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. +// +// Asm: VFNMSUB132PD, CPU Feature: AVX512EVEX +func (x Float64x4) FusedNegativeMultiplySub132(y Float64x4, z Float64x4) Float64x4 + +// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. +// +// Asm: VFNMSUB132PD, CPU Feature: AVX512EVEX +func (x Float64x8) FusedNegativeMultiplySub132(y Float64x8, z Float64x8) Float64x8 + +/* FusedNegativeMultiplySub213 */ + +// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. +// +// Asm: VFNMSUB213PS, CPU Feature: AVX512EVEX +func (x Float32x4) FusedNegativeMultiplySub213(y Float32x4, z Float32x4) Float32x4 + +// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. +// +// Asm: VFNMSUB213PS, CPU Feature: AVX512EVEX +func (x Float32x8) FusedNegativeMultiplySub213(y Float32x8, z Float32x8) Float32x8 + +// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. +// +// Asm: VFNMSUB213PS, CPU Feature: AVX512EVEX +func (x Float32x16) FusedNegativeMultiplySub213(y Float32x16, z Float32x16) Float32x16 + +// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. +// +// Asm: VFNMSUB213PD, CPU Feature: AVX512EVEX +func (x Float64x2) FusedNegativeMultiplySub213(y Float64x2, z Float64x2) Float64x2 + +// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. +// +// Asm: VFNMSUB213PD, CPU Feature: AVX512EVEX +func (x Float64x4) FusedNegativeMultiplySub213(y Float64x4, z Float64x4) Float64x4 + +// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. +// +// Asm: VFNMSUB213PD, CPU Feature: AVX512EVEX +func (x Float64x8) FusedNegativeMultiplySub213(y Float64x8, z Float64x8) Float64x8 + +/* FusedNegativeMultiplySub231 */ + +// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. +// +// Asm: VFNMSUB231PS, CPU Feature: AVX512EVEX +func (x Float32x4) FusedNegativeMultiplySub231(y Float32x4, z Float32x4) Float32x4 + +// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. +// +// Asm: VFNMSUB231PS, CPU Feature: AVX512EVEX +func (x Float32x8) FusedNegativeMultiplySub231(y Float32x8, z Float32x8) Float32x8 + +// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. +// +// Asm: VFNMSUB231PS, CPU Feature: AVX512EVEX +func (x Float32x16) FusedNegativeMultiplySub231(y Float32x16, z Float32x16) Float32x16 + +// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. +// +// Asm: VFNMSUB231PD, CPU Feature: AVX512EVEX +func (x Float64x2) FusedNegativeMultiplySub231(y Float64x2, z Float64x2) Float64x2 + +// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. +// +// Asm: VFNMSUB231PD, CPU Feature: AVX512EVEX +func (x Float64x4) FusedNegativeMultiplySub231(y Float64x4, z Float64x4) Float64x4 + +// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. +// +// Asm: VFNMSUB231PD, CPU Feature: AVX512EVEX +func (x Float64x8) FusedNegativeMultiplySub231(y Float64x8, z Float64x8) Float64x8 + +/* Greater */ + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPGTB, CPU Feature: AVX +func (x Int8x16) Greater(y Int8x16) Mask8x16 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPGTB, CPU Feature: AVX2 +func (x Int8x32) Greater(y Int8x32) Mask8x32 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPGTW, CPU Feature: AVX +func (x Int16x8) Greater(y Int16x8) Mask16x8 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPGTW, CPU Feature: AVX2 +func (x Int16x16) Greater(y Int16x16) Mask16x16 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPGTD, CPU Feature: AVX +func (x Int32x4) Greater(y Int32x4) Mask32x4 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPGTD, CPU Feature: AVX2 +func (x Int32x8) Greater(y Int32x8) Mask32x8 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPGTQ, CPU Feature: AVX2 +func (x Int64x4) Greater(y Int64x4) Mask64x4 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x4) Greater(y Float32x4) Mask32x4 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x8) Greater(y Float32x8) Mask32x8 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x16) Greater(y Float32x16) Mask32x16 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x2) Greater(y Float64x2) Mask64x2 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x4) Greater(y Float64x4) Mask64x4 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x8) Greater(y Float64x8) Mask64x8 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x64) Greater(y Int8x64) Mask8x64 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x32) Greater(y Int16x32) Mask16x32 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x16) Greater(y Int32x16) Mask32x16 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x2) Greater(y Int64x2) Mask64x2 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x8) Greater(y Int64x8) Mask64x8 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x16) Greater(y Uint8x16) Mask8x16 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x32) Greater(y Uint8x32) Mask8x32 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x64) Greater(y Uint8x64) Mask8x64 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x8) Greater(y Uint16x8) Mask16x8 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x16) Greater(y Uint16x16) Mask16x16 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x32) Greater(y Uint16x32) Mask16x32 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x4) Greater(y Uint32x4) Mask32x4 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x8) Greater(y Uint32x8) Mask32x8 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x16) Greater(y Uint32x16) Mask32x16 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x2) Greater(y Uint64x2) Mask64x2 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x4) Greater(y Uint64x4) Mask64x4 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x8) Greater(y Uint64x8) Mask64x8 + +/* GreaterEqual */ + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x4) GreaterEqual(y Float32x4) Mask32x4 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x8) GreaterEqual(y Float32x8) Mask32x8 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x16) GreaterEqual(y Float32x16) Mask32x16 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x2) GreaterEqual(y Float64x2) Mask64x2 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x4) GreaterEqual(y Float64x4) Mask64x4 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x8) GreaterEqual(y Float64x8) Mask64x8 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x16) GreaterEqual(y Int8x16) Mask8x16 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x32) GreaterEqual(y Int8x32) Mask8x32 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x64) GreaterEqual(y Int8x64) Mask8x64 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x8) GreaterEqual(y Int16x8) Mask16x8 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x16) GreaterEqual(y Int16x16) Mask16x16 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x32) GreaterEqual(y Int16x32) Mask16x32 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x4) GreaterEqual(y Int32x4) Mask32x4 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x8) GreaterEqual(y Int32x8) Mask32x8 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x16) GreaterEqual(y Int32x16) Mask32x16 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x2) GreaterEqual(y Int64x2) Mask64x2 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x4) GreaterEqual(y Int64x4) Mask64x4 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x8) GreaterEqual(y Int64x8) Mask64x8 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x16) GreaterEqual(y Uint8x16) Mask8x16 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x32) GreaterEqual(y Uint8x32) Mask8x32 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x64) GreaterEqual(y Uint8x64) Mask8x64 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x8) GreaterEqual(y Uint16x8) Mask16x8 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x16) GreaterEqual(y Uint16x16) Mask16x16 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x32) GreaterEqual(y Uint16x32) Mask16x32 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x4) GreaterEqual(y Uint32x4) Mask32x4 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x8) GreaterEqual(y Uint32x8) Mask32x8 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x16) GreaterEqual(y Uint32x16) Mask32x16 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x4) GreaterEqual(y Uint64x4) Mask64x4 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x8) GreaterEqual(y Uint64x8) Mask64x8 + +/* IsNan */ + +// IsNan checks if elements are NaN. Use as x.IsNan(x). // Const Immediate = 3. // -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) TruncWithPrecision(imm8 uint8) Float32x16 +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x4) IsNan(y Float32x4) Mask32x4 -// TruncWithPrecision truncates elements with specified precision. +// IsNan checks if elements are NaN. Use as x.IsNan(x). // Const Immediate = 3. // -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x4) TruncWithPrecision(imm8 uint8) Float32x4 +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x8) IsNan(y Float32x8) Mask32x8 -// TruncWithPrecision truncates elements with specified precision. +// IsNan checks if elements are NaN. Use as x.IsNan(x). // Const Immediate = 3. // -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x8) TruncWithPrecision(imm8 uint8) Float32x8 +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x16) IsNan(y Float32x16) Mask32x16 -// TruncWithPrecision truncates elements with specified precision. +// IsNan checks if elements are NaN. Use as x.IsNan(x). // Const Immediate = 3. // -// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x2) TruncWithPrecision(imm8 uint8) Float64x2 +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x2) IsNan(y Float64x2) Mask64x2 -// TruncWithPrecision truncates elements with specified precision. +// IsNan checks if elements are NaN. Use as x.IsNan(x). // Const Immediate = 3. // -// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x4) TruncWithPrecision(imm8 uint8) Float64x4 +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x4) IsNan(y Float64x4) Mask64x4 -// TruncWithPrecision truncates elements with specified precision. +// IsNan checks if elements are NaN. Use as x.IsNan(x). // Const Immediate = 3. // -// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x8) TruncWithPrecision(imm8 uint8) Float64x8 +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x8) IsNan(y Float64x8) Mask64x8 -// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions. -// Const Immediate = 10. +/* Less */ + +// Less compares for less than. +// Const Immediate = 1. // -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16 +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x4) Less(y Float32x4) Mask32x4 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x8) Less(y Float32x8) Mask32x8 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x16) Less(y Float32x16) Mask32x16 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x2) Less(y Float64x2) Mask64x2 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x4) Less(y Float64x4) Mask64x4 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x8) Less(y Float64x8) Mask64x8 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x16) Less(y Int8x16) Mask8x16 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x32) Less(y Int8x32) Mask8x32 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x64) Less(y Int8x64) Mask8x64 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x8) Less(y Int16x8) Mask16x8 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x16) Less(y Int16x16) Mask16x16 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x32) Less(y Int16x32) Mask16x32 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x4) Less(y Int32x4) Mask32x4 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x8) Less(y Int32x8) Mask32x8 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x16) Less(y Int32x16) Mask32x16 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x2) Less(y Int64x2) Mask64x2 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x4) Less(y Int64x4) Mask64x4 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x8) Less(y Int64x8) Mask64x8 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x16) Less(y Uint8x16) Mask8x16 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x32) Less(y Uint8x32) Mask8x32 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x64) Less(y Uint8x64) Mask8x64 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x8) Less(y Uint16x8) Mask16x8 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x16) Less(y Uint16x16) Mask16x16 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x32) Less(y Uint16x32) Mask16x32 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x4) Less(y Uint32x4) Mask32x4 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x8) Less(y Uint32x8) Mask32x8 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x16) Less(y Uint32x16) Mask32x16 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x2) Less(y Uint64x2) Mask64x2 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x4) Less(y Uint64x4) Mask64x4 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x8) Less(y Uint64x8) Mask64x8 + +/* LessEqual */ + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x4) LessEqual(y Float32x4) Mask32x4 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x8) LessEqual(y Float32x8) Mask32x8 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x16) LessEqual(y Float32x16) Mask32x16 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x2) LessEqual(y Float64x2) Mask64x2 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x4) LessEqual(y Float64x4) Mask64x4 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x8) LessEqual(y Float64x8) Mask64x8 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x16) LessEqual(y Int8x16) Mask8x16 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x32) LessEqual(y Int8x32) Mask8x32 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x64) LessEqual(y Int8x64) Mask8x64 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x8) LessEqual(y Int16x8) Mask16x8 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x16) LessEqual(y Int16x16) Mask16x16 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x32) LessEqual(y Int16x32) Mask16x32 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x4) LessEqual(y Int32x4) Mask32x4 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x8) LessEqual(y Int32x8) Mask32x8 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x16) LessEqual(y Int32x16) Mask32x16 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x2) LessEqual(y Int64x2) Mask64x2 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x4) LessEqual(y Int64x4) Mask64x4 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x8) LessEqual(y Int64x8) Mask64x8 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x64) LessEqual(y Uint8x64) Mask8x64 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x32) LessEqual(y Uint16x32) Mask16x32 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x8) LessEqual(y Uint64x8) Mask64x8 + +/* MaskedAbsolute */ + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSB, CPU Feature: AVX512EVEX +func (x Int8x16) MaskedAbsolute(y Mask8x16) Int8x16 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSB, CPU Feature: AVX512EVEX +func (x Int8x32) MaskedAbsolute(y Mask8x32) Int8x32 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSB, CPU Feature: AVX512EVEX +func (x Int8x64) MaskedAbsolute(y Mask8x64) Int8x64 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSW, CPU Feature: AVX512EVEX +func (x Int16x8) MaskedAbsolute(y Mask16x8) Int16x8 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSW, CPU Feature: AVX512EVEX +func (x Int16x16) MaskedAbsolute(y Mask16x16) Int16x16 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSW, CPU Feature: AVX512EVEX +func (x Int16x32) MaskedAbsolute(y Mask16x32) Int16x32 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedAbsolute(y Mask32x4) Int32x4 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedAbsolute(y Mask32x8) Int32x8 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedAbsolute(y Mask32x16) Int32x16 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedAbsolute(y Mask64x2) Int64x2 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedAbsolute(y Mask64x4) Int64x4 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedAbsolute(y Mask64x8) Int64x8 + +/* MaskedAdd */ + +// Add adds corresponding elements of two vectors. +// +// Asm: VADDPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedAdd(y Float32x4, z Mask32x4) Float32x4 + +// Add adds corresponding elements of two vectors. +// +// Asm: VADDPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedAdd(y Float32x8, z Mask32x8) Float32x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VADDPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedAdd(y Float32x16, z Mask32x16) Float32x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VADDPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedAdd(y Float64x2, z Mask64x2) Float64x2 + +// Add adds corresponding elements of two vectors. +// +// Asm: VADDPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedAdd(y Float64x4, z Mask64x4) Float64x4 + +// Add adds corresponding elements of two vectors. +// +// Asm: VADDPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedAdd(y Float64x8, z Mask64x8) Float64x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDB, CPU Feature: AVX512EVEX +func (x Int8x16) MaskedAdd(y Int8x16, z Mask8x16) Int8x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDB, CPU Feature: AVX512EVEX +func (x Int8x32) MaskedAdd(y Int8x32, z Mask8x32) Int8x32 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDB, CPU Feature: AVX512EVEX +func (x Int8x64) MaskedAdd(y Int8x64, z Mask8x64) Int8x64 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDW, CPU Feature: AVX512EVEX +func (x Int16x8) MaskedAdd(y Int16x8, z Mask16x8) Int16x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDW, CPU Feature: AVX512EVEX +func (x Int16x16) MaskedAdd(y Int16x16, z Mask16x16) Int16x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDW, CPU Feature: AVX512EVEX +func (x Int16x32) MaskedAdd(y Int16x32, z Mask16x32) Int16x32 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedAdd(y Int32x4, z Mask32x4) Int32x4 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedAdd(y Int32x8, z Mask32x8) Int32x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedAdd(y Int32x16, z Mask32x16) Int32x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedAdd(y Int64x2, z Mask64x2) Int64x2 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedAdd(y Int64x4, z Mask64x4) Int64x4 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedAdd(y Int64x8, z Mask64x8) Int64x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDB, CPU Feature: AVX512EVEX +func (x Uint8x16) MaskedAdd(y Uint8x16, z Mask8x16) Uint8x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDB, CPU Feature: AVX512EVEX +func (x Uint8x32) MaskedAdd(y Uint8x32, z Mask8x32) Uint8x32 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDB, CPU Feature: AVX512EVEX +func (x Uint8x64) MaskedAdd(y Uint8x64, z Mask8x64) Uint8x64 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDW, CPU Feature: AVX512EVEX +func (x Uint16x8) MaskedAdd(y Uint16x8, z Mask16x8) Uint16x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDW, CPU Feature: AVX512EVEX +func (x Uint16x16) MaskedAdd(y Uint16x16, z Mask16x16) Uint16x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDW, CPU Feature: AVX512EVEX +func (x Uint16x32) MaskedAdd(y Uint16x32, z Mask16x32) Uint16x32 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedAdd(y Uint32x4, z Mask32x4) Uint32x4 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedAdd(y Uint32x8, z Mask32x8) Uint32x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDD, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedAdd(y Uint32x16, z Mask32x16) Uint32x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedAdd(y Uint64x2, z Mask64x2) Uint64x2 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedAdd(y Uint64x4, z Mask64x4) Uint64x4 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedAdd(y Uint64x8, z Mask64x8) Uint64x8 + +/* MaskedAnd */ + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VANDPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedAnd(y Float32x4, z Mask32x4) Float32x4 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VANDPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedAnd(y Float32x8, z Mask32x8) Float32x8 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VANDPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedAnd(y Float32x16, z Mask32x16) Float32x16 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VANDPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedAnd(y Float64x2, z Mask64x2) Float64x2 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VANDPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedAnd(y Float64x4, z Mask64x4) Float64x4 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VANDPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedAnd(y Float64x8, z Mask64x8) Float64x8 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedAnd(y Int32x4, z Mask32x4) Int32x4 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedAnd(y Int32x8, z Mask32x8) Int32x8 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedAnd(y Int32x16, z Mask32x16) Int32x16 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedAnd(y Int64x2, z Mask64x2) Int64x2 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedAnd(y Int64x4, z Mask64x4) Int64x4 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedAnd(y Int64x8, z Mask64x8) Int64x8 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedAnd(y Uint32x4, z Mask32x4) Uint32x4 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedAnd(y Uint32x8, z Mask32x8) Uint32x8 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDD, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedAnd(y Uint32x16, z Mask32x16) Uint32x16 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedAnd(y Uint64x2, z Mask64x2) Uint64x2 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedAnd(y Uint64x4, z Mask64x4) Uint64x4 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedAnd(y Uint64x8, z Mask64x8) Uint64x8 + +/* MaskedAndNot */ + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VANDNPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedAndNot(y Float32x4, z Mask32x4) Float32x4 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VANDNPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedAndNot(y Float32x8, z Mask32x8) Float32x8 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VANDNPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedAndNot(y Float32x16, z Mask32x16) Float32x16 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VANDNPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedAndNot(y Float64x2, z Mask64x2) Float64x2 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VANDNPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedAndNot(y Float64x4, z Mask64x4) Float64x4 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VANDNPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedAndNot(y Float64x8, z Mask64x8) Float64x8 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VPANDND, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedAndNot(y Int32x4, z Mask32x4) Int32x4 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VPANDND, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedAndNot(y Int32x8, z Mask32x8) Int32x8 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VPANDND, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedAndNot(y Int32x16, z Mask32x16) Int32x16 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VPANDNQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedAndNot(y Int64x2, z Mask64x2) Int64x2 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VPANDNQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedAndNot(y Int64x4, z Mask64x4) Int64x4 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VPANDNQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedAndNot(y Int64x8, z Mask64x8) Int64x8 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VPANDND, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedAndNot(y Uint32x4, z Mask32x4) Uint32x4 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VPANDND, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedAndNot(y Uint32x8, z Mask32x8) Uint32x8 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VPANDND, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedAndNot(y Uint32x16, z Mask32x16) Uint32x16 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VPANDNQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedAndNot(y Uint64x2, z Mask64x2) Uint64x2 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VPANDNQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedAndNot(y Uint64x4, z Mask64x4) Uint64x4 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VPANDNQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedAndNot(y Uint64x8, z Mask64x8) Uint64x8 + +/* MaskedApproximateReciprocal */ + +// ApproximateReciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedApproximateReciprocal(y Mask32x4) Float32x4 + +// ApproximateReciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedApproximateReciprocal(y Mask32x8) Float32x8 + +// ApproximateReciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedApproximateReciprocal(y Mask32x16) Float32x16 + +// ApproximateReciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedApproximateReciprocal(y Mask64x2) Float64x2 + +// ApproximateReciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedApproximateReciprocal(y Mask64x4) Float64x4 + +// ApproximateReciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedApproximateReciprocal(y Mask64x8) Float64x8 + +/* MaskedApproximateReciprocalOfSqrt */ + +// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. +// +// Asm: VRSQRT14PS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedApproximateReciprocalOfSqrt(y Mask32x4) Float32x4 + +// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. +// +// Asm: VRSQRT14PS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedApproximateReciprocalOfSqrt(y Mask32x8) Float32x8 + +// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. +// +// Asm: VRSQRT14PS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedApproximateReciprocalOfSqrt(y Mask32x16) Float32x16 + +// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. +// +// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedApproximateReciprocalOfSqrt(y Mask64x2) Float64x2 + +// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. +// +// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedApproximateReciprocalOfSqrt(y Mask64x4) Float64x4 + +// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. +// +// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedApproximateReciprocalOfSqrt(y Mask64x8) Float64x8 + +/* MaskedAverage */ + +// Average computes the rounded average of corresponding elements. +// +// Asm: VPAVGB, CPU Feature: AVX512EVEX +func (x Uint8x16) MaskedAverage(y Uint8x16, z Mask8x16) Uint8x16 + +// Average computes the rounded average of corresponding elements. +// +// Asm: VPAVGB, CPU Feature: AVX512EVEX +func (x Uint8x32) MaskedAverage(y Uint8x32, z Mask8x32) Uint8x32 + +// Average computes the rounded average of corresponding elements. +// +// Asm: VPAVGB, CPU Feature: AVX512EVEX +func (x Uint8x64) MaskedAverage(y Uint8x64, z Mask8x64) Uint8x64 + +// Average computes the rounded average of corresponding elements. +// +// Asm: VPAVGW, CPU Feature: AVX512EVEX +func (x Uint16x8) MaskedAverage(y Uint16x8, z Mask16x8) Uint16x8 + +// Average computes the rounded average of corresponding elements. +// +// Asm: VPAVGW, CPU Feature: AVX512EVEX +func (x Uint16x16) MaskedAverage(y Uint16x16, z Mask16x16) Uint16x16 + +// Average computes the rounded average of corresponding elements. +// +// Asm: VPAVGW, CPU Feature: AVX512EVEX +func (x Uint16x32) MaskedAverage(y Uint16x32, z Mask16x32) Uint16x32 + +/* MaskedCeilSuppressExceptionWithPrecision */ // CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions. // Const Immediate = 10. @@ -7846,6 +3180,12 @@ func (x Float32x4) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX func (x Float32x8) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8 +// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions. +// Const Immediate = 10. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16 + // CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions. // Const Immediate = 10. // @@ -7864,11 +3204,7 @@ func (x Float64x4) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX func (x Float64x8) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8 -// CeilWithPrecision rounds elements up with specified precision, masked. -// Const Immediate = 2. -// -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedCeilWithPrecision(imm uint8, y Mask32x16) Float32x16 +/* MaskedCeilWithPrecision */ // CeilWithPrecision rounds elements up with specified precision, masked. // Const Immediate = 2. @@ -7882,6 +3218,12 @@ func (x Float32x4) MaskedCeilWithPrecision(imm uint8, y Mask32x4) Float32x4 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX func (x Float32x8) MaskedCeilWithPrecision(imm uint8, y Mask32x8) Float32x8 +// CeilWithPrecision rounds elements up with specified precision, masked. +// Const Immediate = 2. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedCeilWithPrecision(imm uint8, y Mask32x16) Float32x16 + // CeilWithPrecision rounds elements up with specified precision, masked. // Const Immediate = 2. // @@ -7900,11 +3242,7 @@ func (x Float64x4) MaskedCeilWithPrecision(imm uint8, y Mask64x4) Float64x4 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX func (x Float64x8) MaskedCeilWithPrecision(imm uint8, y Mask64x8) Float64x8 -// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions. -// Const Immediate = 10. -// -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16 +/* MaskedDiffWithCeilSuppressExceptionWithPrecision */ // DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions. // Const Immediate = 10. @@ -7918,6 +3256,12 @@ func (x Float32x4) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y // Asm: VREDUCEPS, CPU Feature: AVX512EVEX func (x Float32x8) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8 +// DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions. +// Const Immediate = 10. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16 + // DiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions. // Const Immediate = 10. // @@ -7936,11 +3280,7 @@ func (x Float64x4) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y // Asm: VREDUCEPD, CPU Feature: AVX512EVEX func (x Float64x8) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8 -// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. -// Const Immediate = 2. -// -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask32x16) Float32x16 +/* MaskedDiffWithCeilWithPrecision */ // DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. // Const Immediate = 2. @@ -7954,6 +3294,12 @@ func (x Float32x4) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask32x4) Float3 // Asm: VREDUCEPS, CPU Feature: AVX512EVEX func (x Float32x8) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask32x8) Float32x8 +// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. +// Const Immediate = 2. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask32x16) Float32x16 + // DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. // Const Immediate = 2. // @@ -7972,11 +3318,7 @@ func (x Float64x4) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask64x4) Float6 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX func (x Float64x8) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask64x8) Float64x8 -// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions. -// Const Immediate = 9. -// -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16 +/* MaskedDiffWithFloorSuppressExceptionWithPrecision */ // DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions. // Const Immediate = 9. @@ -7990,6 +3332,12 @@ func (x Float32x4) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, // Asm: VREDUCEPS, CPU Feature: AVX512EVEX func (x Float32x8) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8 +// DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions. +// Const Immediate = 9. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16 + // DiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions. // Const Immediate = 9. // @@ -8008,11 +3356,7 @@ func (x Float64x4) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, // Asm: VREDUCEPD, CPU Feature: AVX512EVEX func (x Float64x8) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8 -// DiffWithFloorWithPrecision computes the difference after flooring with specified precision. -// Const Immediate = 1. -// -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask32x16) Float32x16 +/* MaskedDiffWithFloorWithPrecision */ // DiffWithFloorWithPrecision computes the difference after flooring with specified precision. // Const Immediate = 1. @@ -8026,6 +3370,12 @@ func (x Float32x4) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask32x4) Float // Asm: VREDUCEPS, CPU Feature: AVX512EVEX func (x Float32x8) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask32x8) Float32x8 +// DiffWithFloorWithPrecision computes the difference after flooring with specified precision. +// Const Immediate = 1. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask32x16) Float32x16 + // DiffWithFloorWithPrecision computes the difference after flooring with specified precision. // Const Immediate = 1. // @@ -8044,11 +3394,7 @@ func (x Float64x4) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask64x4) Float // Asm: VREDUCEPD, CPU Feature: AVX512EVEX func (x Float64x8) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask64x8) Float64x8 -// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions. -// Const Immediate = 8. -// -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16 +/* MaskedDiffWithRoundSuppressExceptionWithPrecision */ // DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions. // Const Immediate = 8. @@ -8062,6 +3408,12 @@ func (x Float32x4) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, // Asm: VREDUCEPS, CPU Feature: AVX512EVEX func (x Float32x8) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8 +// DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions. +// Const Immediate = 8. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16 + // DiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions. // Const Immediate = 8. // @@ -8080,11 +3432,7 @@ func (x Float64x4) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, // Asm: VREDUCEPD, CPU Feature: AVX512EVEX func (x Float64x8) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8 -// DiffWithRoundWithPrecision computes the difference after rounding with specified precision. -// Const Immediate = 0. -// -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask32x16) Float32x16 +/* MaskedDiffWithRoundWithPrecision */ // DiffWithRoundWithPrecision computes the difference after rounding with specified precision. // Const Immediate = 0. @@ -8098,6 +3446,12 @@ func (x Float32x4) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask32x4) Float // Asm: VREDUCEPS, CPU Feature: AVX512EVEX func (x Float32x8) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask32x8) Float32x8 +// DiffWithRoundWithPrecision computes the difference after rounding with specified precision. +// Const Immediate = 0. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask32x16) Float32x16 + // DiffWithRoundWithPrecision computes the difference after rounding with specified precision. // Const Immediate = 0. // @@ -8116,11 +3470,7 @@ func (x Float64x4) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask64x4) Float // Asm: VREDUCEPD, CPU Feature: AVX512EVEX func (x Float64x8) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask64x8) Float64x8 -// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions. -// Const Immediate = 11. -// -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16 +/* MaskedDiffWithTruncSuppressExceptionWithPrecision */ // DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions. // Const Immediate = 11. @@ -8134,6 +3484,12 @@ func (x Float32x4) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, // Asm: VREDUCEPS, CPU Feature: AVX512EVEX func (x Float32x8) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8 +// DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions. +// Const Immediate = 11. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16 + // DiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions. // Const Immediate = 11. // @@ -8152,11 +3508,7 @@ func (x Float64x4) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, // Asm: VREDUCEPD, CPU Feature: AVX512EVEX func (x Float64x8) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8 -// DiffWithTruncWithPrecision computes the difference after truncating with specified precision. -// Const Immediate = 3. -// -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask32x16) Float32x16 +/* MaskedDiffWithTruncWithPrecision */ // DiffWithTruncWithPrecision computes the difference after truncating with specified precision. // Const Immediate = 3. @@ -8170,6 +3522,12 @@ func (x Float32x4) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask32x4) Float // Asm: VREDUCEPS, CPU Feature: AVX512EVEX func (x Float32x8) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask32x8) Float32x8 +// DiffWithTruncWithPrecision computes the difference after truncating with specified precision. +// Const Immediate = 3. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask32x16) Float32x16 + // DiffWithTruncWithPrecision computes the difference after truncating with specified precision. // Const Immediate = 3. // @@ -8188,11 +3546,221 @@ func (x Float64x4) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask64x4) Float // Asm: VREDUCEPD, CPU Feature: AVX512EVEX func (x Float64x8) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask64x8) Float64x8 -// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked. -// Const Immediate = 9. +/* MaskedDiv */ + +// Div divides elements of two vectors. // -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16 +// Asm: VDIVPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedDiv(y Float32x4, z Mask32x4) Float32x4 + +// Div divides elements of two vectors. +// +// Asm: VDIVPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedDiv(y Float32x8, z Mask32x8) Float32x8 + +// Div divides elements of two vectors. +// +// Asm: VDIVPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedDiv(y Float32x16, z Mask32x16) Float32x16 + +// Div divides elements of two vectors. +// +// Asm: VDIVPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedDiv(y Float64x2, z Mask64x2) Float64x2 + +// Div divides elements of two vectors. +// +// Asm: VDIVPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedDiv(y Float64x4, z Mask64x4) Float64x4 + +// Div divides elements of two vectors. +// +// Asm: VDIVPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedDiv(y Float64x8, z Mask64x8) Float64x8 + +/* MaskedEqual */ + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedEqual(y Float32x4, z Mask32x4) Mask32x4 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedEqual(y Float32x8, z Mask32x8) Mask32x8 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedEqual(y Float32x16, z Mask32x16) Mask32x16 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedEqual(y Float64x2, z Mask64x2) Mask64x2 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedEqual(y Float64x4, z Mask64x4) Mask64x4 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedEqual(y Float64x8, z Mask64x8) Mask64x8 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x16) MaskedEqual(y Int8x16, z Mask8x16) Mask8x16 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x32) MaskedEqual(y Int8x32, z Mask8x32) Mask8x32 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x64) MaskedEqual(y Int8x64, z Mask8x64) Mask8x64 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x8) MaskedEqual(y Int16x8, z Mask16x8) Mask16x8 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x16) MaskedEqual(y Int16x16, z Mask16x16) Mask16x16 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x32) MaskedEqual(y Int16x32, z Mask16x32) Mask16x32 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedEqual(y Int32x4, z Mask32x4) Mask32x4 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedEqual(y Int32x8, z Mask32x8) Mask32x8 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedEqual(y Int32x16, z Mask32x16) Mask32x16 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedEqual(y Int64x2, z Mask64x2) Mask64x2 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedEqual(y Int64x4, z Mask64x4) Mask64x4 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedEqual(y Int64x8, z Mask64x8) Mask64x8 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x16) MaskedEqual(y Uint8x16, z Mask8x16) Mask8x16 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x32) MaskedEqual(y Uint8x32, z Mask8x32) Mask8x32 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x64) MaskedEqual(y Uint8x64, z Mask8x64) Mask8x64 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x8) MaskedEqual(y Uint16x8, z Mask16x8) Mask16x8 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x16) MaskedEqual(y Uint16x16, z Mask16x16) Mask16x16 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x32) MaskedEqual(y Uint16x32, z Mask16x32) Mask16x32 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedEqual(y Uint32x4, z Mask32x4) Mask32x4 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedEqual(y Uint32x8, z Mask32x8) Mask32x8 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedEqual(y Uint32x16, z Mask32x16) Mask32x16 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedEqual(y Uint64x2, z Mask64x2) Mask64x2 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedEqual(y Uint64x4, z Mask64x4) Mask64x4 + +// Equal compares for equality, masked. +// Const Immediate = 0. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedEqual(y Uint64x8, z Mask64x8) Mask64x8 + +/* MaskedFloorSuppressExceptionWithPrecision */ // FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked. // Const Immediate = 9. @@ -8206,6 +3774,12 @@ func (x Float32x4) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask32 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX func (x Float32x8) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8 +// FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked. +// Const Immediate = 9. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16 + // FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked. // Const Immediate = 9. // @@ -8224,11 +3798,7 @@ func (x Float64x4) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask64 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX func (x Float64x8) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8 -// FloorWithPrecision rounds elements down with specified precision, masked. -// Const Immediate = 1. -// -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedFloorWithPrecision(imm uint8, y Mask32x16) Float32x16 +/* MaskedFloorWithPrecision */ // FloorWithPrecision rounds elements down with specified precision, masked. // Const Immediate = 1. @@ -8242,6 +3812,12 @@ func (x Float32x4) MaskedFloorWithPrecision(imm uint8, y Mask32x4) Float32x4 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX func (x Float32x8) MaskedFloorWithPrecision(imm uint8, y Mask32x8) Float32x8 +// FloorWithPrecision rounds elements down with specified precision, masked. +// Const Immediate = 1. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedFloorWithPrecision(imm uint8, y Mask32x16) Float32x16 + // FloorWithPrecision rounds elements down with specified precision, masked. // Const Immediate = 1. // @@ -8260,11 +3836,2267 @@ func (x Float64x4) MaskedFloorWithPrecision(imm uint8, y Mask64x4) Float64x4 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX func (x Float64x8) MaskedFloorWithPrecision(imm uint8, y Mask64x8) Float64x8 -// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions. -// Const Immediate = 8. +/* MaskedFusedMultiplyAdd132 */ + +// FusedMultiplyAdd132 performs `(v1 * v3) + v2`. // -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16 +// Asm: VFMADD132PS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedFusedMultiplyAdd132(y Float32x4, z Float32x4, u Mask32x4) Float32x4 + +// FusedMultiplyAdd132 performs `(v1 * v3) + v2`. +// +// Asm: VFMADD132PS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedFusedMultiplyAdd132(y Float32x8, z Float32x8, u Mask32x8) Float32x8 + +// FusedMultiplyAdd132 performs `(v1 * v3) + v2`. +// +// Asm: VFMADD132PS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedFusedMultiplyAdd132(y Float32x16, z Float32x16, u Mask32x16) Float32x16 + +// FusedMultiplyAdd132 performs `(v1 * v3) + v2`. +// +// Asm: VFMADD132PD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedFusedMultiplyAdd132(y Float64x2, z Float64x2, u Mask64x2) Float64x2 + +// FusedMultiplyAdd132 performs `(v1 * v3) + v2`. +// +// Asm: VFMADD132PD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedFusedMultiplyAdd132(y Float64x4, z Float64x4, u Mask64x4) Float64x4 + +// FusedMultiplyAdd132 performs `(v1 * v3) + v2`. +// +// Asm: VFMADD132PD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedFusedMultiplyAdd132(y Float64x8, z Float64x8, u Mask64x8) Float64x8 + +/* MaskedFusedMultiplyAdd213 */ + +// FusedMultiplyAdd213 performs `(v2 * v1) + v3`. +// +// Asm: VFMADD213PS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedFusedMultiplyAdd213(y Float32x4, z Float32x4, u Mask32x4) Float32x4 + +// FusedMultiplyAdd213 performs `(v2 * v1) + v3`. +// +// Asm: VFMADD213PS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedFusedMultiplyAdd213(y Float32x8, z Float32x8, u Mask32x8) Float32x8 + +// FusedMultiplyAdd213 performs `(v2 * v1) + v3`. +// +// Asm: VFMADD213PS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedFusedMultiplyAdd213(y Float32x16, z Float32x16, u Mask32x16) Float32x16 + +// FusedMultiplyAdd213 performs `(v2 * v1) + v3`. +// +// Asm: VFMADD213PD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedFusedMultiplyAdd213(y Float64x2, z Float64x2, u Mask64x2) Float64x2 + +// FusedMultiplyAdd213 performs `(v2 * v1) + v3`. +// +// Asm: VFMADD213PD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedFusedMultiplyAdd213(y Float64x4, z Float64x4, u Mask64x4) Float64x4 + +// FusedMultiplyAdd213 performs `(v2 * v1) + v3`. +// +// Asm: VFMADD213PD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedFusedMultiplyAdd213(y Float64x8, z Float64x8, u Mask64x8) Float64x8 + +/* MaskedFusedMultiplyAdd231 */ + +// FusedMultiplyAdd231 performs `(v2 * v3) + v1`. +// +// Asm: VFMADD231PS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedFusedMultiplyAdd231(y Float32x4, z Float32x4, u Mask32x4) Float32x4 + +// FusedMultiplyAdd231 performs `(v2 * v3) + v1`. +// +// Asm: VFMADD231PS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedFusedMultiplyAdd231(y Float32x8, z Float32x8, u Mask32x8) Float32x8 + +// FusedMultiplyAdd231 performs `(v2 * v3) + v1`. +// +// Asm: VFMADD231PS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedFusedMultiplyAdd231(y Float32x16, z Float32x16, u Mask32x16) Float32x16 + +// FusedMultiplyAdd231 performs `(v2 * v3) + v1`. +// +// Asm: VFMADD231PD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedFusedMultiplyAdd231(y Float64x2, z Float64x2, u Mask64x2) Float64x2 + +// FusedMultiplyAdd231 performs `(v2 * v3) + v1`. +// +// Asm: VFMADD231PD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedFusedMultiplyAdd231(y Float64x4, z Float64x4, u Mask64x4) Float64x4 + +// FusedMultiplyAdd231 performs `(v2 * v3) + v1`. +// +// Asm: VFMADD231PD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedFusedMultiplyAdd231(y Float64x8, z Float64x8, u Mask64x8) Float64x8 + +/* MaskedFusedMultiplyAddSub132 */ + +// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. +// +// Asm: VFMADDSUB132PS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedFusedMultiplyAddSub132(y Float32x4, z Float32x4, u Mask32x4) Float32x4 + +// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. +// +// Asm: VFMADDSUB132PS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedFusedMultiplyAddSub132(y Float32x8, z Float32x8, u Mask32x8) Float32x8 + +// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. +// +// Asm: VFMADDSUB132PS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedFusedMultiplyAddSub132(y Float32x16, z Float32x16, u Mask32x16) Float32x16 + +// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. +// +// Asm: VFMADDSUB132PD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedFusedMultiplyAddSub132(y Float64x2, z Float64x2, u Mask64x2) Float64x2 + +// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. +// +// Asm: VFMADDSUB132PD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedFusedMultiplyAddSub132(y Float64x4, z Float64x4, u Mask64x4) Float64x4 + +// FusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. +// +// Asm: VFMADDSUB132PD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedFusedMultiplyAddSub132(y Float64x8, z Float64x8, u Mask64x8) Float64x8 + +/* MaskedFusedMultiplyAddSub213 */ + +// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. +// +// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedFusedMultiplyAddSub213(y Float32x4, z Float32x4, u Mask32x4) Float32x4 + +// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. +// +// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedFusedMultiplyAddSub213(y Float32x8, z Float32x8, u Mask32x8) Float32x8 + +// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. +// +// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedFusedMultiplyAddSub213(y Float32x16, z Float32x16, u Mask32x16) Float32x16 + +// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. +// +// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedFusedMultiplyAddSub213(y Float64x2, z Float64x2, u Mask64x2) Float64x2 + +// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. +// +// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedFusedMultiplyAddSub213(y Float64x4, z Float64x4, u Mask64x4) Float64x4 + +// FusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. +// +// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedFusedMultiplyAddSub213(y Float64x8, z Float64x8, u Mask64x8) Float64x8 + +/* MaskedFusedMultiplyAddSub231 */ + +// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. +// +// Asm: VFMADDSUB231PS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedFusedMultiplyAddSub231(y Float32x4, z Float32x4, u Mask32x4) Float32x4 + +// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. +// +// Asm: VFMADDSUB231PS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedFusedMultiplyAddSub231(y Float32x8, z Float32x8, u Mask32x8) Float32x8 + +// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. +// +// Asm: VFMADDSUB231PS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedFusedMultiplyAddSub231(y Float32x16, z Float32x16, u Mask32x16) Float32x16 + +// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. +// +// Asm: VFMADDSUB231PD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedFusedMultiplyAddSub231(y Float64x2, z Float64x2, u Mask64x2) Float64x2 + +// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. +// +// Asm: VFMADDSUB231PD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedFusedMultiplyAddSub231(y Float64x4, z Float64x4, u Mask64x4) Float64x4 + +// FusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. +// +// Asm: VFMADDSUB231PD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedFusedMultiplyAddSub231(y Float64x8, z Float64x8, u Mask64x8) Float64x8 + +/* MaskedFusedMultiplySub132 */ + +// FusedMultiplySub132 performs `(v1 * v3) - v2`. +// +// Asm: VFMSUB132PS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedFusedMultiplySub132(y Float32x4, z Float32x4, u Mask32x4) Float32x4 + +// FusedMultiplySub132 performs `(v1 * v3) - v2`. +// +// Asm: VFMSUB132PS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedFusedMultiplySub132(y Float32x8, z Float32x8, u Mask32x8) Float32x8 + +// FusedMultiplySub132 performs `(v1 * v3) - v2`. +// +// Asm: VFMSUB132PS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedFusedMultiplySub132(y Float32x16, z Float32x16, u Mask32x16) Float32x16 + +// FusedMultiplySub132 performs `(v1 * v3) - v2`. +// +// Asm: VFMSUB132PD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedFusedMultiplySub132(y Float64x2, z Float64x2, u Mask64x2) Float64x2 + +// FusedMultiplySub132 performs `(v1 * v3) - v2`. +// +// Asm: VFMSUB132PD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedFusedMultiplySub132(y Float64x4, z Float64x4, u Mask64x4) Float64x4 + +// FusedMultiplySub132 performs `(v1 * v3) - v2`. +// +// Asm: VFMSUB132PD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedFusedMultiplySub132(y Float64x8, z Float64x8, u Mask64x8) Float64x8 + +/* MaskedFusedMultiplySub213 */ + +// FusedMultiplySub213 performs `(v2 * v1) - v3`. +// +// Asm: VFMSUB213PS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedFusedMultiplySub213(y Float32x4, z Float32x4, u Mask32x4) Float32x4 + +// FusedMultiplySub213 performs `(v2 * v1) - v3`. +// +// Asm: VFMSUB213PS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedFusedMultiplySub213(y Float32x8, z Float32x8, u Mask32x8) Float32x8 + +// FusedMultiplySub213 performs `(v2 * v1) - v3`. +// +// Asm: VFMSUB213PS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedFusedMultiplySub213(y Float32x16, z Float32x16, u Mask32x16) Float32x16 + +// FusedMultiplySub213 performs `(v2 * v1) - v3`. +// +// Asm: VFMSUB213PD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedFusedMultiplySub213(y Float64x2, z Float64x2, u Mask64x2) Float64x2 + +// FusedMultiplySub213 performs `(v2 * v1) - v3`. +// +// Asm: VFMSUB213PD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedFusedMultiplySub213(y Float64x4, z Float64x4, u Mask64x4) Float64x4 + +// FusedMultiplySub213 performs `(v2 * v1) - v3`. +// +// Asm: VFMSUB213PD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedFusedMultiplySub213(y Float64x8, z Float64x8, u Mask64x8) Float64x8 + +/* MaskedFusedMultiplySub231 */ + +// FusedMultiplySub231 performs `(v2 * v3) - v1`. +// +// Asm: VFMSUB231PS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedFusedMultiplySub231(y Float32x4, z Float32x4, u Mask32x4) Float32x4 + +// FusedMultiplySub231 performs `(v2 * v3) - v1`. +// +// Asm: VFMSUB231PS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedFusedMultiplySub231(y Float32x8, z Float32x8, u Mask32x8) Float32x8 + +// FusedMultiplySub231 performs `(v2 * v3) - v1`. +// +// Asm: VFMSUB231PS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedFusedMultiplySub231(y Float32x16, z Float32x16, u Mask32x16) Float32x16 + +// FusedMultiplySub231 performs `(v2 * v3) - v1`. +// +// Asm: VFMSUB231PD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedFusedMultiplySub231(y Float64x2, z Float64x2, u Mask64x2) Float64x2 + +// FusedMultiplySub231 performs `(v2 * v3) - v1`. +// +// Asm: VFMSUB231PD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedFusedMultiplySub231(y Float64x4, z Float64x4, u Mask64x4) Float64x4 + +// FusedMultiplySub231 performs `(v2 * v3) - v1`. +// +// Asm: VFMSUB231PD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedFusedMultiplySub231(y Float64x8, z Float64x8, u Mask64x8) Float64x8 + +/* MaskedFusedMultiplySubAdd132 */ + +// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. +// +// Asm: VFMSUBADD132PS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedFusedMultiplySubAdd132(y Float32x4, z Float32x4, u Mask32x4) Float32x4 + +// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. +// +// Asm: VFMSUBADD132PS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedFusedMultiplySubAdd132(y Float32x8, z Float32x8, u Mask32x8) Float32x8 + +// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. +// +// Asm: VFMSUBADD132PS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedFusedMultiplySubAdd132(y Float32x16, z Float32x16, u Mask32x16) Float32x16 + +// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. +// +// Asm: VFMSUBADD132PD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedFusedMultiplySubAdd132(y Float64x2, z Float64x2, u Mask64x2) Float64x2 + +// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. +// +// Asm: VFMSUBADD132PD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedFusedMultiplySubAdd132(y Float64x4, z Float64x4, u Mask64x4) Float64x4 + +// FusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. +// +// Asm: VFMSUBADD132PD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedFusedMultiplySubAdd132(y Float64x8, z Float64x8, u Mask64x8) Float64x8 + +/* MaskedFusedMultiplySubAdd213 */ + +// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. +// +// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedFusedMultiplySubAdd213(y Float32x4, z Float32x4, u Mask32x4) Float32x4 + +// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. +// +// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedFusedMultiplySubAdd213(y Float32x8, z Float32x8, u Mask32x8) Float32x8 + +// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. +// +// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedFusedMultiplySubAdd213(y Float32x16, z Float32x16, u Mask32x16) Float32x16 + +// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. +// +// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedFusedMultiplySubAdd213(y Float64x2, z Float64x2, u Mask64x2) Float64x2 + +// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. +// +// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedFusedMultiplySubAdd213(y Float64x4, z Float64x4, u Mask64x4) Float64x4 + +// FusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. +// +// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedFusedMultiplySubAdd213(y Float64x8, z Float64x8, u Mask64x8) Float64x8 + +/* MaskedFusedMultiplySubAdd231 */ + +// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. +// +// Asm: VFMSUBADD231PS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedFusedMultiplySubAdd231(y Float32x4, z Float32x4, u Mask32x4) Float32x4 + +// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. +// +// Asm: VFMSUBADD231PS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedFusedMultiplySubAdd231(y Float32x8, z Float32x8, u Mask32x8) Float32x8 + +// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. +// +// Asm: VFMSUBADD231PS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedFusedMultiplySubAdd231(y Float32x16, z Float32x16, u Mask32x16) Float32x16 + +// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. +// +// Asm: VFMSUBADD231PD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedFusedMultiplySubAdd231(y Float64x2, z Float64x2, u Mask64x2) Float64x2 + +// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. +// +// Asm: VFMSUBADD231PD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedFusedMultiplySubAdd231(y Float64x4, z Float64x4, u Mask64x4) Float64x4 + +// FusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. +// +// Asm: VFMSUBADD231PD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedFusedMultiplySubAdd231(y Float64x8, z Float64x8, u Mask64x8) Float64x8 + +/* MaskedFusedNegativeMultiplyAdd132 */ + +// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. +// +// Asm: VFNMADD132PS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedFusedNegativeMultiplyAdd132(y Float32x4, z Float32x4, u Mask32x4) Float32x4 + +// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. +// +// Asm: VFNMADD132PS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedFusedNegativeMultiplyAdd132(y Float32x8, z Float32x8, u Mask32x8) Float32x8 + +// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. +// +// Asm: VFNMADD132PS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedFusedNegativeMultiplyAdd132(y Float32x16, z Float32x16, u Mask32x16) Float32x16 + +// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. +// +// Asm: VFNMADD132PD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedFusedNegativeMultiplyAdd132(y Float64x2, z Float64x2, u Mask64x2) Float64x2 + +// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. +// +// Asm: VFNMADD132PD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedFusedNegativeMultiplyAdd132(y Float64x4, z Float64x4, u Mask64x4) Float64x4 + +// FusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. +// +// Asm: VFNMADD132PD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedFusedNegativeMultiplyAdd132(y Float64x8, z Float64x8, u Mask64x8) Float64x8 + +/* MaskedFusedNegativeMultiplyAdd213 */ + +// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. +// +// Asm: VFNMADD213PS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedFusedNegativeMultiplyAdd213(y Float32x4, z Float32x4, u Mask32x4) Float32x4 + +// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. +// +// Asm: VFNMADD213PS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedFusedNegativeMultiplyAdd213(y Float32x8, z Float32x8, u Mask32x8) Float32x8 + +// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. +// +// Asm: VFNMADD213PS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedFusedNegativeMultiplyAdd213(y Float32x16, z Float32x16, u Mask32x16) Float32x16 + +// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. +// +// Asm: VFNMADD213PD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedFusedNegativeMultiplyAdd213(y Float64x2, z Float64x2, u Mask64x2) Float64x2 + +// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. +// +// Asm: VFNMADD213PD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedFusedNegativeMultiplyAdd213(y Float64x4, z Float64x4, u Mask64x4) Float64x4 + +// FusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. +// +// Asm: VFNMADD213PD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedFusedNegativeMultiplyAdd213(y Float64x8, z Float64x8, u Mask64x8) Float64x8 + +/* MaskedFusedNegativeMultiplyAdd231 */ + +// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. +// +// Asm: VFNMADD231PS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedFusedNegativeMultiplyAdd231(y Float32x4, z Float32x4, u Mask32x4) Float32x4 + +// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. +// +// Asm: VFNMADD231PS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedFusedNegativeMultiplyAdd231(y Float32x8, z Float32x8, u Mask32x8) Float32x8 + +// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. +// +// Asm: VFNMADD231PS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedFusedNegativeMultiplyAdd231(y Float32x16, z Float32x16, u Mask32x16) Float32x16 + +// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. +// +// Asm: VFNMADD231PD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedFusedNegativeMultiplyAdd231(y Float64x2, z Float64x2, u Mask64x2) Float64x2 + +// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. +// +// Asm: VFNMADD231PD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedFusedNegativeMultiplyAdd231(y Float64x4, z Float64x4, u Mask64x4) Float64x4 + +// FusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. +// +// Asm: VFNMADD231PD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedFusedNegativeMultiplyAdd231(y Float64x8, z Float64x8, u Mask64x8) Float64x8 + +/* MaskedFusedNegativeMultiplySub132 */ + +// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. +// +// Asm: VFNMSUB132PS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedFusedNegativeMultiplySub132(y Float32x4, z Float32x4, u Mask32x4) Float32x4 + +// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. +// +// Asm: VFNMSUB132PS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedFusedNegativeMultiplySub132(y Float32x8, z Float32x8, u Mask32x8) Float32x8 + +// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. +// +// Asm: VFNMSUB132PS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedFusedNegativeMultiplySub132(y Float32x16, z Float32x16, u Mask32x16) Float32x16 + +// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. +// +// Asm: VFNMSUB132PD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedFusedNegativeMultiplySub132(y Float64x2, z Float64x2, u Mask64x2) Float64x2 + +// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. +// +// Asm: VFNMSUB132PD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedFusedNegativeMultiplySub132(y Float64x4, z Float64x4, u Mask64x4) Float64x4 + +// FusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. +// +// Asm: VFNMSUB132PD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedFusedNegativeMultiplySub132(y Float64x8, z Float64x8, u Mask64x8) Float64x8 + +/* MaskedFusedNegativeMultiplySub213 */ + +// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. +// +// Asm: VFNMSUB213PS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedFusedNegativeMultiplySub213(y Float32x4, z Float32x4, u Mask32x4) Float32x4 + +// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. +// +// Asm: VFNMSUB213PS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedFusedNegativeMultiplySub213(y Float32x8, z Float32x8, u Mask32x8) Float32x8 + +// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. +// +// Asm: VFNMSUB213PS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedFusedNegativeMultiplySub213(y Float32x16, z Float32x16, u Mask32x16) Float32x16 + +// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. +// +// Asm: VFNMSUB213PD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedFusedNegativeMultiplySub213(y Float64x2, z Float64x2, u Mask64x2) Float64x2 + +// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. +// +// Asm: VFNMSUB213PD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedFusedNegativeMultiplySub213(y Float64x4, z Float64x4, u Mask64x4) Float64x4 + +// FusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. +// +// Asm: VFNMSUB213PD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedFusedNegativeMultiplySub213(y Float64x8, z Float64x8, u Mask64x8) Float64x8 + +/* MaskedFusedNegativeMultiplySub231 */ + +// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. +// +// Asm: VFNMSUB231PS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedFusedNegativeMultiplySub231(y Float32x4, z Float32x4, u Mask32x4) Float32x4 + +// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. +// +// Asm: VFNMSUB231PS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedFusedNegativeMultiplySub231(y Float32x8, z Float32x8, u Mask32x8) Float32x8 + +// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. +// +// Asm: VFNMSUB231PS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedFusedNegativeMultiplySub231(y Float32x16, z Float32x16, u Mask32x16) Float32x16 + +// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. +// +// Asm: VFNMSUB231PD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedFusedNegativeMultiplySub231(y Float64x2, z Float64x2, u Mask64x2) Float64x2 + +// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. +// +// Asm: VFNMSUB231PD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedFusedNegativeMultiplySub231(y Float64x4, z Float64x4, u Mask64x4) Float64x4 + +// FusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. +// +// Asm: VFNMSUB231PD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedFusedNegativeMultiplySub231(y Float64x8, z Float64x8, u Mask64x8) Float64x8 + +/* MaskedGreater */ + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedGreater(y Float32x4, z Mask32x4) Mask32x4 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedGreater(y Float32x8, z Mask32x8) Mask32x8 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedGreater(y Float32x16, z Mask32x16) Mask32x16 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedGreater(y Float64x2, z Mask64x2) Mask64x2 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedGreater(y Float64x4, z Mask64x4) Mask64x4 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedGreater(y Float64x8, z Mask64x8) Mask64x8 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x16) MaskedGreater(y Int8x16, z Mask8x16) Mask8x16 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x32) MaskedGreater(y Int8x32, z Mask8x32) Mask8x32 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x64) MaskedGreater(y Int8x64, z Mask8x64) Mask8x64 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x8) MaskedGreater(y Int16x8, z Mask16x8) Mask16x8 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x16) MaskedGreater(y Int16x16, z Mask16x16) Mask16x16 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x32) MaskedGreater(y Int16x32, z Mask16x32) Mask16x32 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedGreater(y Int32x4, z Mask32x4) Mask32x4 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedGreater(y Int32x8, z Mask32x8) Mask32x8 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedGreater(y Int32x16, z Mask32x16) Mask32x16 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedGreater(y Int64x2, z Mask64x2) Mask64x2 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedGreater(y Int64x4, z Mask64x4) Mask64x4 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedGreater(y Int64x8, z Mask64x8) Mask64x8 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x16) MaskedGreater(y Uint8x16, z Mask8x16) Mask8x16 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x32) MaskedGreater(y Uint8x32, z Mask8x32) Mask8x32 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x64) MaskedGreater(y Uint8x64, z Mask8x64) Mask8x64 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x8) MaskedGreater(y Uint16x8, z Mask16x8) Mask16x8 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x16) MaskedGreater(y Uint16x16, z Mask16x16) Mask16x16 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x32) MaskedGreater(y Uint16x32, z Mask16x32) Mask16x32 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedGreater(y Uint32x4, z Mask32x4) Mask32x4 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedGreater(y Uint32x8, z Mask32x8) Mask32x8 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedGreater(y Uint32x16, z Mask32x16) Mask32x16 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedGreater(y Uint64x2, z Mask64x2) Mask64x2 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedGreater(y Uint64x4, z Mask64x4) Mask64x4 + +// Greater compares for greater than. +// Const Immediate = 6. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedGreater(y Uint64x8, z Mask64x8) Mask64x8 + +/* MaskedGreaterEqual */ + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedGreaterEqual(y Float32x4, z Mask32x4) Mask32x4 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedGreaterEqual(y Float32x8, z Mask32x8) Mask32x8 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedGreaterEqual(y Float32x16, z Mask32x16) Mask32x16 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedGreaterEqual(y Float64x2, z Mask64x2) Mask64x2 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedGreaterEqual(y Float64x4, z Mask64x4) Mask64x4 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedGreaterEqual(y Float64x8, z Mask64x8) Mask64x8 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x16) MaskedGreaterEqual(y Int8x16, z Mask8x16) Mask8x16 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x32) MaskedGreaterEqual(y Int8x32, z Mask8x32) Mask8x32 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x64) MaskedGreaterEqual(y Int8x64, z Mask8x64) Mask8x64 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x8) MaskedGreaterEqual(y Int16x8, z Mask16x8) Mask16x8 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x16) MaskedGreaterEqual(y Int16x16, z Mask16x16) Mask16x16 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x32) MaskedGreaterEqual(y Int16x32, z Mask16x32) Mask16x32 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedGreaterEqual(y Int32x4, z Mask32x4) Mask32x4 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedGreaterEqual(y Int32x8, z Mask32x8) Mask32x8 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedGreaterEqual(y Int32x16, z Mask32x16) Mask32x16 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedGreaterEqual(y Int64x2, z Mask64x2) Mask64x2 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedGreaterEqual(y Int64x4, z Mask64x4) Mask64x4 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedGreaterEqual(y Int64x8, z Mask64x8) Mask64x8 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x16) MaskedGreaterEqual(y Uint8x16, z Mask8x16) Mask8x16 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x32) MaskedGreaterEqual(y Uint8x32, z Mask8x32) Mask8x32 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x64) MaskedGreaterEqual(y Uint8x64, z Mask8x64) Mask8x64 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x8) MaskedGreaterEqual(y Uint16x8, z Mask16x8) Mask16x8 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x16) MaskedGreaterEqual(y Uint16x16, z Mask16x16) Mask16x16 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x32) MaskedGreaterEqual(y Uint16x32, z Mask16x32) Mask16x32 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedGreaterEqual(y Uint32x4, z Mask32x4) Mask32x4 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedGreaterEqual(y Uint32x8, z Mask32x8) Mask32x8 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedGreaterEqual(y Uint32x16, z Mask32x16) Mask32x16 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedGreaterEqual(y Uint64x2, z Mask64x2) Mask64x2 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedGreaterEqual(y Uint64x4, z Mask64x4) Mask64x4 + +// GreaterEqual compares for greater than or equal. +// Const Immediate = 5. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedGreaterEqual(y Uint64x8, z Mask64x8) Mask64x8 + +/* MaskedIsNan */ + +// IsNan checks if elements are NaN. Use as x.IsNan(x). +// Const Immediate = 3. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedIsNan(y Float32x4, z Mask32x4) Mask32x4 + +// IsNan checks if elements are NaN. Use as x.IsNan(x). +// Const Immediate = 3. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedIsNan(y Float32x8, z Mask32x8) Mask32x8 + +// IsNan checks if elements are NaN. Use as x.IsNan(x). +// Const Immediate = 3. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedIsNan(y Float32x16, z Mask32x16) Mask32x16 + +// IsNan checks if elements are NaN. Use as x.IsNan(x). +// Const Immediate = 3. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedIsNan(y Float64x2, z Mask64x2) Mask64x2 + +// IsNan checks if elements are NaN. Use as x.IsNan(x). +// Const Immediate = 3. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedIsNan(y Float64x4, z Mask64x4) Mask64x4 + +// IsNan checks if elements are NaN. Use as x.IsNan(x). +// Const Immediate = 3. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedIsNan(y Float64x8, z Mask64x8) Mask64x8 + +/* MaskedLess */ + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedLess(y Float32x4, z Mask32x4) Mask32x4 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedLess(y Float32x8, z Mask32x8) Mask32x8 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedLess(y Float32x16, z Mask32x16) Mask32x16 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedLess(y Float64x2, z Mask64x2) Mask64x2 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedLess(y Float64x4, z Mask64x4) Mask64x4 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedLess(y Float64x8, z Mask64x8) Mask64x8 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x16) MaskedLess(y Int8x16, z Mask8x16) Mask8x16 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x32) MaskedLess(y Int8x32, z Mask8x32) Mask8x32 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x64) MaskedLess(y Int8x64, z Mask8x64) Mask8x64 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x8) MaskedLess(y Int16x8, z Mask16x8) Mask16x8 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x16) MaskedLess(y Int16x16, z Mask16x16) Mask16x16 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x32) MaskedLess(y Int16x32, z Mask16x32) Mask16x32 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedLess(y Int32x4, z Mask32x4) Mask32x4 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedLess(y Int32x8, z Mask32x8) Mask32x8 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedLess(y Int32x16, z Mask32x16) Mask32x16 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedLess(y Int64x2, z Mask64x2) Mask64x2 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedLess(y Int64x4, z Mask64x4) Mask64x4 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedLess(y Int64x8, z Mask64x8) Mask64x8 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x16) MaskedLess(y Uint8x16, z Mask8x16) Mask8x16 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x32) MaskedLess(y Uint8x32, z Mask8x32) Mask8x32 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x64) MaskedLess(y Uint8x64, z Mask8x64) Mask8x64 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x8) MaskedLess(y Uint16x8, z Mask16x8) Mask16x8 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x16) MaskedLess(y Uint16x16, z Mask16x16) Mask16x16 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x32) MaskedLess(y Uint16x32, z Mask16x32) Mask16x32 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedLess(y Uint32x4, z Mask32x4) Mask32x4 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedLess(y Uint32x8, z Mask32x8) Mask32x8 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedLess(y Uint32x16, z Mask32x16) Mask32x16 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedLess(y Uint64x2, z Mask64x2) Mask64x2 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedLess(y Uint64x4, z Mask64x4) Mask64x4 + +// Less compares for less than. +// Const Immediate = 1. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedLess(y Uint64x8, z Mask64x8) Mask64x8 + +/* MaskedLessEqual */ + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedLessEqual(y Float32x4, z Mask32x4) Mask32x4 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedLessEqual(y Float32x8, z Mask32x8) Mask32x8 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedLessEqual(y Float32x16, z Mask32x16) Mask32x16 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedLessEqual(y Float64x2, z Mask64x2) Mask64x2 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedLessEqual(y Float64x4, z Mask64x4) Mask64x4 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedLessEqual(y Float64x8, z Mask64x8) Mask64x8 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x16) MaskedLessEqual(y Int8x16, z Mask8x16) Mask8x16 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x32) MaskedLessEqual(y Int8x32, z Mask8x32) Mask8x32 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x64) MaskedLessEqual(y Int8x64, z Mask8x64) Mask8x64 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x8) MaskedLessEqual(y Int16x8, z Mask16x8) Mask16x8 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x16) MaskedLessEqual(y Int16x16, z Mask16x16) Mask16x16 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x32) MaskedLessEqual(y Int16x32, z Mask16x32) Mask16x32 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedLessEqual(y Int32x4, z Mask32x4) Mask32x4 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedLessEqual(y Int32x8, z Mask32x8) Mask32x8 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedLessEqual(y Int32x16, z Mask32x16) Mask32x16 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedLessEqual(y Int64x2, z Mask64x2) Mask64x2 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedLessEqual(y Int64x4, z Mask64x4) Mask64x4 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedLessEqual(y Int64x8, z Mask64x8) Mask64x8 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x16) MaskedLessEqual(y Uint8x16, z Mask8x16) Mask8x16 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x32) MaskedLessEqual(y Uint8x32, z Mask8x32) Mask8x32 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x64) MaskedLessEqual(y Uint8x64, z Mask8x64) Mask8x64 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x8) MaskedLessEqual(y Uint16x8, z Mask16x8) Mask16x8 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x16) MaskedLessEqual(y Uint16x16, z Mask16x16) Mask16x16 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x32) MaskedLessEqual(y Uint16x32, z Mask16x32) Mask16x32 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedLessEqual(y Uint32x4, z Mask32x4) Mask32x4 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedLessEqual(y Uint32x8, z Mask32x8) Mask32x8 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedLessEqual(y Uint32x16, z Mask32x16) Mask32x16 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedLessEqual(y Uint64x2, z Mask64x2) Mask64x2 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedLessEqual(y Uint64x4, z Mask64x4) Mask64x4 + +// LessEqual compares for less than or equal. +// Const Immediate = 2. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedLessEqual(y Uint64x8, z Mask64x8) Mask64x8 + +/* MaskedMax */ + +// Max computes the maximum of corresponding elements. +// +// Asm: VMAXPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedMax(y Float32x4, z Mask32x4) Float32x4 + +// Max computes the maximum of corresponding elements. +// +// Asm: VMAXPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedMax(y Float32x8, z Mask32x8) Float32x8 + +// Max computes the maximum of corresponding elements. +// +// Asm: VMAXPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedMax(y Float32x16, z Mask32x16) Float32x16 + +// Max computes the maximum of corresponding elements. +// +// Asm: VMAXPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedMax(y Float64x2, z Mask64x2) Float64x2 + +// Max computes the maximum of corresponding elements. +// +// Asm: VMAXPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedMax(y Float64x4, z Mask64x4) Float64x4 + +// Max computes the maximum of corresponding elements. +// +// Asm: VMAXPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedMax(y Float64x8, z Mask64x8) Float64x8 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSB, CPU Feature: AVX512EVEX +func (x Int8x16) MaskedMax(y Int8x16, z Mask8x16) Int8x16 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSB, CPU Feature: AVX512EVEX +func (x Int8x32) MaskedMax(y Int8x32, z Mask8x32) Int8x32 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSB, CPU Feature: AVX512EVEX +func (x Int8x64) MaskedMax(y Int8x64, z Mask8x64) Int8x64 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSW, CPU Feature: AVX512EVEX +func (x Int16x8) MaskedMax(y Int16x8, z Mask16x8) Int16x8 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSW, CPU Feature: AVX512EVEX +func (x Int16x16) MaskedMax(y Int16x16, z Mask16x16) Int16x16 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSW, CPU Feature: AVX512EVEX +func (x Int16x32) MaskedMax(y Int16x32, z Mask16x32) Int16x32 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedMax(y Int32x4, z Mask32x4) Int32x4 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedMax(y Int32x8, z Mask32x8) Int32x8 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedMax(y Int32x16, z Mask32x16) Int32x16 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedMax(y Int64x2, z Mask64x2) Int64x2 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedMax(y Int64x4, z Mask64x4) Int64x4 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedMax(y Int64x8, z Mask64x8) Int64x8 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUB, CPU Feature: AVX512EVEX +func (x Uint8x16) MaskedMax(y Uint8x16, z Mask8x16) Uint8x16 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUB, CPU Feature: AVX512EVEX +func (x Uint8x32) MaskedMax(y Uint8x32, z Mask8x32) Uint8x32 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUB, CPU Feature: AVX512EVEX +func (x Uint8x64) MaskedMax(y Uint8x64, z Mask8x64) Uint8x64 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUW, CPU Feature: AVX512EVEX +func (x Uint16x8) MaskedMax(y Uint16x8, z Mask16x8) Uint16x8 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUW, CPU Feature: AVX512EVEX +func (x Uint16x16) MaskedMax(y Uint16x16, z Mask16x16) Uint16x16 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUW, CPU Feature: AVX512EVEX +func (x Uint16x32) MaskedMax(y Uint16x32, z Mask16x32) Uint16x32 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedMax(y Uint32x4, z Mask32x4) Uint32x4 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedMax(y Uint32x8, z Mask32x8) Uint32x8 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUD, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedMax(y Uint32x16, z Mask32x16) Uint32x16 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedMax(y Uint64x2, z Mask64x2) Uint64x2 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedMax(y Uint64x4, z Mask64x4) Uint64x4 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedMax(y Uint64x8, z Mask64x8) Uint64x8 + +/* MaskedMin */ + +// Min computes the minimum of corresponding elements. +// +// Asm: VMINPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedMin(y Float32x4, z Mask32x4) Float32x4 + +// Min computes the minimum of corresponding elements. +// +// Asm: VMINPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedMin(y Float32x8, z Mask32x8) Float32x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VMINPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedMin(y Float32x16, z Mask32x16) Float32x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VMINPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedMin(y Float64x2, z Mask64x2) Float64x2 + +// Min computes the minimum of corresponding elements. +// +// Asm: VMINPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedMin(y Float64x4, z Mask64x4) Float64x4 + +// Min computes the minimum of corresponding elements. +// +// Asm: VMINPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedMin(y Float64x8, z Mask64x8) Float64x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSB, CPU Feature: AVX512EVEX +func (x Int8x16) MaskedMin(y Int8x16, z Mask8x16) Int8x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSB, CPU Feature: AVX512EVEX +func (x Int8x32) MaskedMin(y Int8x32, z Mask8x32) Int8x32 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSB, CPU Feature: AVX512EVEX +func (x Int8x64) MaskedMin(y Int8x64, z Mask8x64) Int8x64 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSW, CPU Feature: AVX512EVEX +func (x Int16x8) MaskedMin(y Int16x8, z Mask16x8) Int16x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSW, CPU Feature: AVX512EVEX +func (x Int16x16) MaskedMin(y Int16x16, z Mask16x16) Int16x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSW, CPU Feature: AVX512EVEX +func (x Int16x32) MaskedMin(y Int16x32, z Mask16x32) Int16x32 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedMin(y Int32x4, z Mask32x4) Int32x4 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedMin(y Int32x8, z Mask32x8) Int32x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedMin(y Int32x16, z Mask32x16) Int32x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedMin(y Int64x2, z Mask64x2) Int64x2 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedMin(y Int64x4, z Mask64x4) Int64x4 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedMin(y Int64x8, z Mask64x8) Int64x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUB, CPU Feature: AVX512EVEX +func (x Uint8x16) MaskedMin(y Uint8x16, z Mask8x16) Uint8x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUB, CPU Feature: AVX512EVEX +func (x Uint8x32) MaskedMin(y Uint8x32, z Mask8x32) Uint8x32 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUB, CPU Feature: AVX512EVEX +func (x Uint8x64) MaskedMin(y Uint8x64, z Mask8x64) Uint8x64 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUW, CPU Feature: AVX512EVEX +func (x Uint16x8) MaskedMin(y Uint16x8, z Mask16x8) Uint16x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUW, CPU Feature: AVX512EVEX +func (x Uint16x16) MaskedMin(y Uint16x16, z Mask16x16) Uint16x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUW, CPU Feature: AVX512EVEX +func (x Uint16x32) MaskedMin(y Uint16x32, z Mask16x32) Uint16x32 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedMin(y Uint32x4, z Mask32x4) Uint32x4 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedMin(y Uint32x8, z Mask32x8) Uint32x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUD, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedMin(y Uint32x16, z Mask32x16) Uint32x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedMin(y Uint64x2, z Mask64x2) Uint64x2 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedMin(y Uint64x4, z Mask64x4) Uint64x4 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedMin(y Uint64x8, z Mask64x8) Uint64x8 + +/* MaskedMul */ + +// Mul multiplies corresponding elements of two vectors, masked. +// +// Asm: VMULPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedMul(y Float32x4, z Mask32x4) Float32x4 + +// Mul multiplies corresponding elements of two vectors, masked. +// +// Asm: VMULPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedMul(y Float32x8, z Mask32x8) Float32x8 + +// Mul multiplies corresponding elements of two vectors, masked. +// +// Asm: VMULPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedMul(y Float32x16, z Mask32x16) Float32x16 + +// Mul multiplies corresponding elements of two vectors, masked. +// +// Asm: VMULPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedMul(y Float64x2, z Mask64x2) Float64x2 + +// Mul multiplies corresponding elements of two vectors, masked. +// +// Asm: VMULPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedMul(y Float64x4, z Mask64x4) Float64x4 + +// Mul multiplies corresponding elements of two vectors, masked. +// +// Asm: VMULPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedMul(y Float64x8, z Mask64x8) Float64x8 + +/* MaskedMulByPowOf2 */ + +// MulByPowOf2 multiplies elements by a power of 2. +// +// Asm: VSCALEFPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedMulByPowOf2(y Float32x4, z Mask32x4) Float32x4 + +// MulByPowOf2 multiplies elements by a power of 2. +// +// Asm: VSCALEFPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedMulByPowOf2(y Float32x8, z Mask32x8) Float32x8 + +// MulByPowOf2 multiplies elements by a power of 2. +// +// Asm: VSCALEFPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedMulByPowOf2(y Float32x16, z Mask32x16) Float32x16 + +// MulByPowOf2 multiplies elements by a power of 2. +// +// Asm: VSCALEFPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedMulByPowOf2(y Float64x2, z Mask64x2) Float64x2 + +// MulByPowOf2 multiplies elements by a power of 2. +// +// Asm: VSCALEFPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedMulByPowOf2(y Float64x4, z Mask64x4) Float64x4 + +// MulByPowOf2 multiplies elements by a power of 2. +// +// Asm: VSCALEFPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedMulByPowOf2(y Float64x8, z Mask64x8) Float64x8 + +/* MaskedMulEvenWiden */ + +// MulEvenWiden multiplies even-indexed elements, widening the result, masked. +// Result[i] = v1.Even[i] * v2.Even[i]. +// +// Asm: VPMULDQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedMulEvenWiden(y Int64x2, z Mask64x2) Int64x2 + +// MulEvenWiden multiplies even-indexed elements, widening the result, masked. +// Result[i] = v1.Even[i] * v2.Even[i]. +// +// Asm: VPMULDQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedMulEvenWiden(y Int64x4, z Mask64x4) Int64x4 + +// MulEvenWiden multiplies even-indexed elements, widening the result, masked. +// Result[i] = v1.Even[i] * v2.Even[i]. +// +// Asm: VPMULDQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedMulEvenWiden(y Int64x8, z Mask64x8) Int64x8 + +// MulEvenWiden multiplies even-indexed elements, widening the result, masked. +// Result[i] = v1.Even[i] * v2.Even[i]. +// +// Asm: VPMULUDQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedMulEvenWiden(y Uint64x2, z Mask64x2) Uint64x2 + +// MulEvenWiden multiplies even-indexed elements, widening the result, masked. +// Result[i] = v1.Even[i] * v2.Even[i]. +// +// Asm: VPMULUDQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedMulEvenWiden(y Uint64x4, z Mask64x4) Uint64x4 + +// MulEvenWiden multiplies even-indexed elements, widening the result, masked. +// Result[i] = v1.Even[i] * v2.Even[i]. +// +// Asm: VPMULUDQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedMulEvenWiden(y Uint64x8, z Mask64x8) Uint64x8 + +/* MaskedMulHigh */ + +// MulHigh multiplies elements and stores the high part of the result, masked. +// +// Asm: VPMULHW, CPU Feature: AVX512EVEX +func (x Int16x8) MaskedMulHigh(y Int16x8, z Mask16x8) Int16x8 + +// MulHigh multiplies elements and stores the high part of the result, masked. +// +// Asm: VPMULHW, CPU Feature: AVX512EVEX +func (x Int16x16) MaskedMulHigh(y Int16x16, z Mask16x16) Int16x16 + +// MulHigh multiplies elements and stores the high part of the result, masked. +// +// Asm: VPMULHW, CPU Feature: AVX512EVEX +func (x Int16x32) MaskedMulHigh(y Int16x32, z Mask16x32) Int16x32 + +// MulHigh multiplies elements and stores the high part of the result, masked. +// +// Asm: VPMULHUW, CPU Feature: AVX512EVEX +func (x Uint16x8) MaskedMulHigh(y Uint16x8, z Mask16x8) Uint16x8 + +// MulHigh multiplies elements and stores the high part of the result, masked. +// +// Asm: VPMULHUW, CPU Feature: AVX512EVEX +func (x Uint16x16) MaskedMulHigh(y Uint16x16, z Mask16x16) Uint16x16 + +// MulHigh multiplies elements and stores the high part of the result, masked. +// +// Asm: VPMULHUW, CPU Feature: AVX512EVEX +func (x Uint16x32) MaskedMulHigh(y Uint16x32, z Mask16x32) Uint16x32 + +/* MaskedMulLow */ + +// MulLow multiplies elements and stores the low part of the result, masked. +// +// Asm: VPMULLW, CPU Feature: AVX512EVEX +func (x Int16x8) MaskedMulLow(y Int16x8, z Mask16x8) Int16x8 + +// MulLow multiplies elements and stores the low part of the result, masked. +// +// Asm: VPMULLW, CPU Feature: AVX512EVEX +func (x Int16x16) MaskedMulLow(y Int16x16, z Mask16x16) Int16x16 + +// MulLow multiplies elements and stores the low part of the result, masked. +// +// Asm: VPMULLW, CPU Feature: AVX512EVEX +func (x Int16x32) MaskedMulLow(y Int16x32, z Mask16x32) Int16x32 + +// MulLow multiplies elements and stores the low part of the result, masked. +// +// Asm: VPMULLD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedMulLow(y Int32x4, z Mask32x4) Int32x4 + +// MulLow multiplies elements and stores the low part of the result, masked. +// +// Asm: VPMULLD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedMulLow(y Int32x8, z Mask32x8) Int32x8 + +// MulLow multiplies elements and stores the low part of the result, masked. +// +// Asm: VPMULLD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedMulLow(y Int32x16, z Mask32x16) Int32x16 + +// MulLow multiplies elements and stores the low part of the result, masked. +// +// Asm: VPMULLQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedMulLow(y Int64x2, z Mask64x2) Int64x2 + +// MulLow multiplies elements and stores the low part of the result, masked. +// +// Asm: VPMULLQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedMulLow(y Int64x4, z Mask64x4) Int64x4 + +// MulLow multiplies elements and stores the low part of the result, masked. +// +// Asm: VPMULLQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedMulLow(y Int64x8, z Mask64x8) Int64x8 + +/* MaskedNotEqual */ + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedNotEqual(y Float32x4, z Mask32x4) Mask32x4 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedNotEqual(y Float32x8, z Mask32x8) Mask32x8 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedNotEqual(y Float32x16, z Mask32x16) Mask32x16 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedNotEqual(y Float64x2, z Mask64x2) Mask64x2 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedNotEqual(y Float64x4, z Mask64x4) Mask64x4 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedNotEqual(y Float64x8, z Mask64x8) Mask64x8 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x16) MaskedNotEqual(y Int8x16, z Mask8x16) Mask8x16 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x32) MaskedNotEqual(y Int8x32, z Mask8x32) Mask8x32 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x64) MaskedNotEqual(y Int8x64, z Mask8x64) Mask8x64 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x8) MaskedNotEqual(y Int16x8, z Mask16x8) Mask16x8 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x16) MaskedNotEqual(y Int16x16, z Mask16x16) Mask16x16 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x32) MaskedNotEqual(y Int16x32, z Mask16x32) Mask16x32 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedNotEqual(y Int32x4, z Mask32x4) Mask32x4 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedNotEqual(y Int32x8, z Mask32x8) Mask32x8 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedNotEqual(y Int32x16, z Mask32x16) Mask32x16 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedNotEqual(y Int64x2, z Mask64x2) Mask64x2 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedNotEqual(y Int64x4, z Mask64x4) Mask64x4 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedNotEqual(y Int64x8, z Mask64x8) Mask64x8 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x16) MaskedNotEqual(y Uint8x16, z Mask8x16) Mask8x16 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x32) MaskedNotEqual(y Uint8x32, z Mask8x32) Mask8x32 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x64) MaskedNotEqual(y Uint8x64, z Mask8x64) Mask8x64 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x8) MaskedNotEqual(y Uint16x8, z Mask16x8) Mask16x8 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x16) MaskedNotEqual(y Uint16x16, z Mask16x16) Mask16x16 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x32) MaskedNotEqual(y Uint16x32, z Mask16x32) Mask16x32 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedNotEqual(y Uint32x4, z Mask32x4) Mask32x4 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedNotEqual(y Uint32x8, z Mask32x8) Mask32x8 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedNotEqual(y Uint32x16, z Mask32x16) Mask32x16 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedNotEqual(y Uint64x2, z Mask64x2) Mask64x2 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedNotEqual(y Uint64x4, z Mask64x4) Mask64x4 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedNotEqual(y Uint64x8, z Mask64x8) Mask64x8 + +/* MaskedOr */ + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VORPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedOr(y Float32x4, z Mask32x4) Float32x4 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VORPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedOr(y Float32x8, z Mask32x8) Float32x8 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VORPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedOr(y Float32x16, z Mask32x16) Float32x16 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VORPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedOr(y Float64x2, z Mask64x2) Float64x2 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VORPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedOr(y Float64x4, z Mask64x4) Float64x4 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VORPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedOr(y Float64x8, z Mask64x8) Float64x8 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VPORD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedOr(y Int32x4, z Mask32x4) Int32x4 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VPORD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedOr(y Int32x8, z Mask32x8) Int32x8 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VPORD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedOr(y Int32x16, z Mask32x16) Int32x16 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VPORQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedOr(y Int64x2, z Mask64x2) Int64x2 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VPORQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedOr(y Int64x4, z Mask64x4) Int64x4 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VPORQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedOr(y Int64x8, z Mask64x8) Int64x8 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VPORD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedOr(y Uint32x4, z Mask32x4) Uint32x4 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VPORD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedOr(y Uint32x8, z Mask32x8) Uint32x8 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VPORD, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedOr(y Uint32x16, z Mask32x16) Uint32x16 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VPORQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedOr(y Uint64x2, z Mask64x2) Uint64x2 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VPORQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedOr(y Uint64x4, z Mask64x4) Uint64x4 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VPORQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedOr(y Uint64x8, z Mask64x8) Uint64x8 + +/* MaskedPairDotProd */ + +// PairDotProd multiplies the elements and add the pairs together, +// yielding a vector of half as many elements with twice the input element size. +// +// Asm: VPMADDWD, CPU Feature: AVX512EVEX +func (x Int16x8) MaskedPairDotProd(y Int16x8, z Mask16x8) Int32x4 + +// PairDotProd multiplies the elements and add the pairs together, +// yielding a vector of half as many elements with twice the input element size. +// +// Asm: VPMADDWD, CPU Feature: AVX512EVEX +func (x Int16x16) MaskedPairDotProd(y Int16x16, z Mask16x16) Int32x8 + +// PairDotProd multiplies the elements and add the pairs together, +// yielding a vector of half as many elements with twice the input element size. +// +// Asm: VPMADDWD, CPU Feature: AVX512EVEX +func (x Int16x32) MaskedPairDotProd(y Int16x32, z Mask16x32) Int32x16 + +/* MaskedPairDotProdAccumulate */ + +// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. +// +// Asm: VPDPWSSD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedPairDotProdAccumulate(y Int16x8, z Int32x4, u Mask32x4) Int32x4 + +// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. +// +// Asm: VPDPWSSD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedPairDotProdAccumulate(y Int16x16, z Int32x8, u Mask32x8) Int32x8 + +// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. +// +// Asm: VPDPWSSD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedPairDotProdAccumulate(y Int16x32, z Int32x16, u Mask32x16) Int32x16 + +/* MaskedPopCount */ + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTB, CPU Feature: AVX512EVEX +func (x Int8x16) MaskedPopCount(y Mask8x16) Int8x16 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTB, CPU Feature: AVX512EVEX +func (x Int8x32) MaskedPopCount(y Mask8x32) Int8x32 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTB, CPU Feature: AVX512EVEX +func (x Int8x64) MaskedPopCount(y Mask8x64) Int8x64 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTW, CPU Feature: AVX512EVEX +func (x Int16x8) MaskedPopCount(y Mask16x8) Int16x8 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTW, CPU Feature: AVX512EVEX +func (x Int16x16) MaskedPopCount(y Mask16x16) Int16x16 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTW, CPU Feature: AVX512EVEX +func (x Int16x32) MaskedPopCount(y Mask16x32) Int16x32 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedPopCount(y Mask32x4) Int32x4 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedPopCount(y Mask32x8) Int32x8 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedPopCount(y Mask32x16) Int32x16 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedPopCount(y Mask64x2) Int64x2 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedPopCount(y Mask64x4) Int64x4 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedPopCount(y Mask64x8) Int64x8 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTB, CPU Feature: AVX512EVEX +func (x Uint8x16) MaskedPopCount(y Mask8x16) Uint8x16 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTB, CPU Feature: AVX512EVEX +func (x Uint8x32) MaskedPopCount(y Mask8x32) Uint8x32 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTB, CPU Feature: AVX512EVEX +func (x Uint8x64) MaskedPopCount(y Mask8x64) Uint8x64 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTW, CPU Feature: AVX512EVEX +func (x Uint16x8) MaskedPopCount(y Mask16x8) Uint16x8 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTW, CPU Feature: AVX512EVEX +func (x Uint16x16) MaskedPopCount(y Mask16x16) Uint16x16 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTW, CPU Feature: AVX512EVEX +func (x Uint16x32) MaskedPopCount(y Mask16x32) Uint16x32 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedPopCount(y Mask32x4) Uint32x4 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedPopCount(y Mask32x8) Uint32x8 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTD, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedPopCount(y Mask32x16) Uint32x16 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedPopCount(y Mask64x2) Uint64x2 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedPopCount(y Mask64x4) Uint64x4 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedPopCount(y Mask64x8) Uint64x8 + +/* MaskedRoundSuppressExceptionWithPrecision */ // RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions. // Const Immediate = 8. @@ -8278,6 +6110,12 @@ func (x Float32x4) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask32 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX func (x Float32x8) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8 +// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions. +// Const Immediate = 8. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16 + // RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions. // Const Immediate = 8. // @@ -8296,11 +6134,7 @@ func (x Float64x4) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask64 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX func (x Float64x8) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8 -// RoundWithPrecision rounds elements with specified precision. -// Const Immediate = 0. -// -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedRoundWithPrecision(imm uint8, y Mask32x16) Float32x16 +/* MaskedRoundWithPrecision */ // RoundWithPrecision rounds elements with specified precision. // Const Immediate = 0. @@ -8314,6 +6148,12 @@ func (x Float32x4) MaskedRoundWithPrecision(imm uint8, y Mask32x4) Float32x4 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX func (x Float32x8) MaskedRoundWithPrecision(imm uint8, y Mask32x8) Float32x8 +// RoundWithPrecision rounds elements with specified precision. +// Const Immediate = 0. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedRoundWithPrecision(imm uint8, y Mask32x16) Float32x16 + // RoundWithPrecision rounds elements with specified precision. // Const Immediate = 0. // @@ -8332,11 +6172,384 @@ func (x Float64x4) MaskedRoundWithPrecision(imm uint8, y Mask64x4) Float64x4 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX func (x Float64x8) MaskedRoundWithPrecision(imm uint8, y Mask64x8) Float64x8 -// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions. -// Const Immediate = 11. +/* MaskedSaturatedAdd */ + +// SaturatedAdd adds corresponding elements of two vectors with saturation. // -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16 +// Asm: VPADDSB, CPU Feature: AVX512EVEX +func (x Int8x16) MaskedSaturatedAdd(y Int8x16, z Mask8x16) Int8x16 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSB, CPU Feature: AVX512EVEX +func (x Int8x32) MaskedSaturatedAdd(y Int8x32, z Mask8x32) Int8x32 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSB, CPU Feature: AVX512EVEX +func (x Int8x64) MaskedSaturatedAdd(y Int8x64, z Mask8x64) Int8x64 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSW, CPU Feature: AVX512EVEX +func (x Int16x8) MaskedSaturatedAdd(y Int16x8, z Mask16x8) Int16x8 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSW, CPU Feature: AVX512EVEX +func (x Int16x16) MaskedSaturatedAdd(y Int16x16, z Mask16x16) Int16x16 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSW, CPU Feature: AVX512EVEX +func (x Int16x32) MaskedSaturatedAdd(y Int16x32, z Mask16x32) Int16x32 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSB, CPU Feature: AVX512EVEX +func (x Uint8x16) MaskedSaturatedAdd(y Uint8x16, z Mask8x16) Uint8x16 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSB, CPU Feature: AVX512EVEX +func (x Uint8x32) MaskedSaturatedAdd(y Uint8x32, z Mask8x32) Uint8x32 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSB, CPU Feature: AVX512EVEX +func (x Uint8x64) MaskedSaturatedAdd(y Uint8x64, z Mask8x64) Uint8x64 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSW, CPU Feature: AVX512EVEX +func (x Uint16x8) MaskedSaturatedAdd(y Uint16x8, z Mask16x8) Uint16x8 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSW, CPU Feature: AVX512EVEX +func (x Uint16x16) MaskedSaturatedAdd(y Uint16x16, z Mask16x16) Uint16x16 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSW, CPU Feature: AVX512EVEX +func (x Uint16x32) MaskedSaturatedAdd(y Uint16x32, z Mask16x32) Uint16x32 + +/* MaskedSaturatedPairDotProdAccumulate */ + +// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. +// +// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedSaturatedPairDotProdAccumulate(y Int16x8, z Int32x4, u Mask32x4) Int32x4 + +// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. +// +// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedSaturatedPairDotProdAccumulate(y Int16x16, z Int32x8, u Mask32x8) Int32x8 + +// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. +// +// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedSaturatedPairDotProdAccumulate(y Int16x32, z Int32x16, u Mask32x16) Int32x16 + +/* MaskedSaturatedSub */ + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSB, CPU Feature: AVX512EVEX +func (x Int8x16) MaskedSaturatedSub(y Int8x16, z Mask8x16) Int8x16 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSB, CPU Feature: AVX512EVEX +func (x Int8x32) MaskedSaturatedSub(y Int8x32, z Mask8x32) Int8x32 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSB, CPU Feature: AVX512EVEX +func (x Int8x64) MaskedSaturatedSub(y Int8x64, z Mask8x64) Int8x64 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSW, CPU Feature: AVX512EVEX +func (x Int16x8) MaskedSaturatedSub(y Int16x8, z Mask16x8) Int16x8 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSW, CPU Feature: AVX512EVEX +func (x Int16x16) MaskedSaturatedSub(y Int16x16, z Mask16x16) Int16x16 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSW, CPU Feature: AVX512EVEX +func (x Int16x32) MaskedSaturatedSub(y Int16x32, z Mask16x32) Int16x32 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSB, CPU Feature: AVX512EVEX +func (x Uint8x16) MaskedSaturatedSub(y Uint8x16, z Mask8x16) Uint8x16 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSB, CPU Feature: AVX512EVEX +func (x Uint8x32) MaskedSaturatedSub(y Uint8x32, z Mask8x32) Uint8x32 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSB, CPU Feature: AVX512EVEX +func (x Uint8x64) MaskedSaturatedSub(y Uint8x64, z Mask8x64) Uint8x64 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSW, CPU Feature: AVX512EVEX +func (x Uint16x8) MaskedSaturatedSub(y Uint16x8, z Mask16x8) Uint16x8 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSW, CPU Feature: AVX512EVEX +func (x Uint16x16) MaskedSaturatedSub(y Uint16x16, z Mask16x16) Uint16x16 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSW, CPU Feature: AVX512EVEX +func (x Uint16x32) MaskedSaturatedSub(y Uint16x32, z Mask16x32) Uint16x32 + +/* MaskedSaturatedUnsignedSignedPairDotProd */ + +// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, +// yielding a vector of half as many elements with twice the input element size. +// +// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX +func (x Uint16x8) MaskedSaturatedUnsignedSignedPairDotProd(y Int16x8, z Mask16x8) Int16x8 + +// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, +// yielding a vector of half as many elements with twice the input element size. +// +// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX +func (x Uint16x16) MaskedSaturatedUnsignedSignedPairDotProd(y Int16x16, z Mask16x16) Int16x16 + +// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, +// yielding a vector of half as many elements with twice the input element size. +// +// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX +func (x Uint16x32) MaskedSaturatedUnsignedSignedPairDotProd(y Int16x32, z Mask16x32) Int16x32 + +/* MaskedSaturatedUnsignedSignedQuadDotProdAccumulate */ + +// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Int32x4 + +// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Int32x8 + +// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Int32x16 + +// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Uint32x4 + +// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Uint32x8 + +// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Uint32x16 + +/* MaskedSqrt */ + +// Sqrt computes the square root of each element. +// +// Asm: VSQRTPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedSqrt(y Mask32x4) Float32x4 + +// Sqrt computes the square root of each element. +// +// Asm: VSQRTPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedSqrt(y Mask32x8) Float32x8 + +// Sqrt computes the square root of each element. +// +// Asm: VSQRTPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedSqrt(y Mask32x16) Float32x16 + +// Sqrt computes the square root of each element. +// +// Asm: VSQRTPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedSqrt(y Mask64x2) Float64x2 + +// Sqrt computes the square root of each element. +// +// Asm: VSQRTPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedSqrt(y Mask64x4) Float64x4 + +// Sqrt computes the square root of each element. +// +// Asm: VSQRTPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedSqrt(y Mask64x8) Float64x8 + +/* MaskedSub */ + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VSUBPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedSub(y Float32x4, z Mask32x4) Float32x4 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VSUBPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedSub(y Float32x8, z Mask32x8) Float32x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VSUBPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedSub(y Float32x16, z Mask32x16) Float32x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VSUBPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedSub(y Float64x2, z Mask64x2) Float64x2 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VSUBPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedSub(y Float64x4, z Mask64x4) Float64x4 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VSUBPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedSub(y Float64x8, z Mask64x8) Float64x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBB, CPU Feature: AVX512EVEX +func (x Int8x16) MaskedSub(y Int8x16, z Mask8x16) Int8x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBB, CPU Feature: AVX512EVEX +func (x Int8x32) MaskedSub(y Int8x32, z Mask8x32) Int8x32 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBB, CPU Feature: AVX512EVEX +func (x Int8x64) MaskedSub(y Int8x64, z Mask8x64) Int8x64 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBW, CPU Feature: AVX512EVEX +func (x Int16x8) MaskedSub(y Int16x8, z Mask16x8) Int16x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBW, CPU Feature: AVX512EVEX +func (x Int16x16) MaskedSub(y Int16x16, z Mask16x16) Int16x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBW, CPU Feature: AVX512EVEX +func (x Int16x32) MaskedSub(y Int16x32, z Mask16x32) Int16x32 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedSub(y Int32x4, z Mask32x4) Int32x4 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedSub(y Int32x8, z Mask32x8) Int32x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedSub(y Int32x16, z Mask32x16) Int32x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedSub(y Int64x2, z Mask64x2) Int64x2 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedSub(y Int64x4, z Mask64x4) Int64x4 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedSub(y Int64x8, z Mask64x8) Int64x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBB, CPU Feature: AVX512EVEX +func (x Uint8x16) MaskedSub(y Uint8x16, z Mask8x16) Uint8x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBB, CPU Feature: AVX512EVEX +func (x Uint8x32) MaskedSub(y Uint8x32, z Mask8x32) Uint8x32 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBB, CPU Feature: AVX512EVEX +func (x Uint8x64) MaskedSub(y Uint8x64, z Mask8x64) Uint8x64 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBW, CPU Feature: AVX512EVEX +func (x Uint16x8) MaskedSub(y Uint16x8, z Mask16x8) Uint16x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBW, CPU Feature: AVX512EVEX +func (x Uint16x16) MaskedSub(y Uint16x16, z Mask16x16) Uint16x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBW, CPU Feature: AVX512EVEX +func (x Uint16x32) MaskedSub(y Uint16x32, z Mask16x32) Uint16x32 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedSub(y Uint32x4, z Mask32x4) Uint32x4 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedSub(y Uint32x8, z Mask32x8) Uint32x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBD, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedSub(y Uint32x16, z Mask32x16) Uint32x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedSub(y Uint64x2, z Mask64x2) Uint64x2 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedSub(y Uint64x4, z Mask64x4) Uint64x4 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedSub(y Uint64x8, z Mask64x8) Uint64x8 + +/* MaskedTruncSuppressExceptionWithPrecision */ // TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions. // Const Immediate = 11. @@ -8350,6 +6563,12 @@ func (x Float32x4) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask32 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX func (x Float32x8) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8 +// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions. +// Const Immediate = 11. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16 + // TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions. // Const Immediate = 11. // @@ -8368,11 +6587,7 @@ func (x Float64x4) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask64 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX func (x Float64x8) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8 -// TruncWithPrecision truncates elements with specified precision. -// Const Immediate = 3. -// -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedTruncWithPrecision(imm uint8, y Mask32x16) Float32x16 +/* MaskedTruncWithPrecision */ // TruncWithPrecision truncates elements with specified precision. // Const Immediate = 3. @@ -8386,6 +6601,12 @@ func (x Float32x4) MaskedTruncWithPrecision(imm uint8, y Mask32x4) Float32x4 // Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX func (x Float32x8) MaskedTruncWithPrecision(imm uint8, y Mask32x8) Float32x8 +// TruncWithPrecision truncates elements with specified precision. +// Const Immediate = 3. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedTruncWithPrecision(imm uint8, y Mask32x16) Float32x16 + // TruncWithPrecision truncates elements with specified precision. // Const Immediate = 3. // @@ -8404,6 +6625,2083 @@ func (x Float64x4) MaskedTruncWithPrecision(imm uint8, y Mask64x4) Float64x4 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX func (x Float64x8) MaskedTruncWithPrecision(imm uint8, y Mask64x8) Float64x8 +/* MaskedUnsignedSignedQuadDotProdAccumulate */ + +// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Int32x4 + +// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Int32x8 + +// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Int32x16 + +// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Uint32x4 + +// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Uint32x8 + +// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSD, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Uint32x16 + +/* MaskedXor */ + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VXORPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedXor(y Float32x4, z Mask32x4) Float32x4 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VXORPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedXor(y Float32x8, z Mask32x8) Float32x8 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VXORPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedXor(y Float32x16, z Mask32x16) Float32x16 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VXORPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedXor(y Float64x2, z Mask64x2) Float64x2 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VXORPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedXor(y Float64x4, z Mask64x4) Float64x4 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VXORPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedXor(y Float64x8, z Mask64x8) Float64x8 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedXor(y Int32x4, z Mask32x4) Int32x4 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedXor(y Int32x8, z Mask32x8) Int32x8 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedXor(y Int32x16, z Mask32x16) Int32x16 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedXor(y Int64x2, z Mask64x2) Int64x2 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedXor(y Int64x4, z Mask64x4) Int64x4 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedXor(y Int64x8, z Mask64x8) Int64x8 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedXor(y Uint32x4, z Mask32x4) Uint32x4 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedXor(y Uint32x8, z Mask32x8) Uint32x8 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORD, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedXor(y Uint32x16, z Mask32x16) Uint32x16 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedXor(y Uint64x2, z Mask64x2) Uint64x2 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedXor(y Uint64x4, z Mask64x4) Uint64x4 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedXor(y Uint64x8, z Mask64x8) Uint64x8 + +/* Max */ + +// Max computes the maximum of corresponding elements. +// +// Asm: VMAXPS, CPU Feature: AVX +func (x Float32x4) Max(y Float32x4) Float32x4 + +// Max computes the maximum of corresponding elements. +// +// Asm: VMAXPS, CPU Feature: AVX +func (x Float32x8) Max(y Float32x8) Float32x8 + +// Max computes the maximum of corresponding elements. +// +// Asm: VMAXPS, CPU Feature: AVX512EVEX +func (x Float32x16) Max(y Float32x16) Float32x16 + +// Max computes the maximum of corresponding elements. +// +// Asm: VMAXPD, CPU Feature: AVX +func (x Float64x2) Max(y Float64x2) Float64x2 + +// Max computes the maximum of corresponding elements. +// +// Asm: VMAXPD, CPU Feature: AVX +func (x Float64x4) Max(y Float64x4) Float64x4 + +// Max computes the maximum of corresponding elements. +// +// Asm: VMAXPD, CPU Feature: AVX512EVEX +func (x Float64x8) Max(y Float64x8) Float64x8 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSB, CPU Feature: AVX +func (x Int8x16) Max(y Int8x16) Int8x16 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSB, CPU Feature: AVX2 +func (x Int8x32) Max(y Int8x32) Int8x32 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSB, CPU Feature: AVX512EVEX +func (x Int8x64) Max(y Int8x64) Int8x64 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSW, CPU Feature: AVX +func (x Int16x8) Max(y Int16x8) Int16x8 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSW, CPU Feature: AVX2 +func (x Int16x16) Max(y Int16x16) Int16x16 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSW, CPU Feature: AVX512EVEX +func (x Int16x32) Max(y Int16x32) Int16x32 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSD, CPU Feature: AVX +func (x Int32x4) Max(y Int32x4) Int32x4 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSD, CPU Feature: AVX2 +func (x Int32x8) Max(y Int32x8) Int32x8 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSD, CPU Feature: AVX512EVEX +func (x Int32x16) Max(y Int32x16) Int32x16 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSQ, CPU Feature: AVX512EVEX +func (x Int64x2) Max(y Int64x2) Int64x2 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSQ, CPU Feature: AVX512EVEX +func (x Int64x4) Max(y Int64x4) Int64x4 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSQ, CPU Feature: AVX512EVEX +func (x Int64x8) Max(y Int64x8) Int64x8 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUB, CPU Feature: AVX +func (x Uint8x16) Max(y Uint8x16) Uint8x16 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUB, CPU Feature: AVX2 +func (x Uint8x32) Max(y Uint8x32) Uint8x32 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUB, CPU Feature: AVX512EVEX +func (x Uint8x64) Max(y Uint8x64) Uint8x64 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUW, CPU Feature: AVX +func (x Uint16x8) Max(y Uint16x8) Uint16x8 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUW, CPU Feature: AVX2 +func (x Uint16x16) Max(y Uint16x16) Uint16x16 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUW, CPU Feature: AVX512EVEX +func (x Uint16x32) Max(y Uint16x32) Uint16x32 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUD, CPU Feature: AVX +func (x Uint32x4) Max(y Uint32x4) Uint32x4 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUD, CPU Feature: AVX2 +func (x Uint32x8) Max(y Uint32x8) Uint32x8 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUD, CPU Feature: AVX512EVEX +func (x Uint32x16) Max(y Uint32x16) Uint32x16 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUQ, CPU Feature: AVX512EVEX +func (x Uint64x2) Max(y Uint64x2) Uint64x2 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUQ, CPU Feature: AVX512EVEX +func (x Uint64x4) Max(y Uint64x4) Uint64x4 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUQ, CPU Feature: AVX512EVEX +func (x Uint64x8) Max(y Uint64x8) Uint64x8 + +/* Min */ + +// Min computes the minimum of corresponding elements. +// +// Asm: VMINPS, CPU Feature: AVX +func (x Float32x4) Min(y Float32x4) Float32x4 + +// Min computes the minimum of corresponding elements. +// +// Asm: VMINPS, CPU Feature: AVX +func (x Float32x8) Min(y Float32x8) Float32x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VMINPS, CPU Feature: AVX512EVEX +func (x Float32x16) Min(y Float32x16) Float32x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VMINPD, CPU Feature: AVX +func (x Float64x2) Min(y Float64x2) Float64x2 + +// Min computes the minimum of corresponding elements. +// +// Asm: VMINPD, CPU Feature: AVX +func (x Float64x4) Min(y Float64x4) Float64x4 + +// Min computes the minimum of corresponding elements. +// +// Asm: VMINPD, CPU Feature: AVX512EVEX +func (x Float64x8) Min(y Float64x8) Float64x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSB, CPU Feature: AVX +func (x Int8x16) Min(y Int8x16) Int8x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSB, CPU Feature: AVX2 +func (x Int8x32) Min(y Int8x32) Int8x32 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSB, CPU Feature: AVX512EVEX +func (x Int8x64) Min(y Int8x64) Int8x64 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSW, CPU Feature: AVX +func (x Int16x8) Min(y Int16x8) Int16x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSW, CPU Feature: AVX2 +func (x Int16x16) Min(y Int16x16) Int16x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSW, CPU Feature: AVX512EVEX +func (x Int16x32) Min(y Int16x32) Int16x32 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSD, CPU Feature: AVX +func (x Int32x4) Min(y Int32x4) Int32x4 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSD, CPU Feature: AVX2 +func (x Int32x8) Min(y Int32x8) Int32x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSD, CPU Feature: AVX512EVEX +func (x Int32x16) Min(y Int32x16) Int32x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSQ, CPU Feature: AVX512EVEX +func (x Int64x2) Min(y Int64x2) Int64x2 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSQ, CPU Feature: AVX512EVEX +func (x Int64x4) Min(y Int64x4) Int64x4 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSQ, CPU Feature: AVX512EVEX +func (x Int64x8) Min(y Int64x8) Int64x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUB, CPU Feature: AVX +func (x Uint8x16) Min(y Uint8x16) Uint8x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUB, CPU Feature: AVX2 +func (x Uint8x32) Min(y Uint8x32) Uint8x32 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUB, CPU Feature: AVX512EVEX +func (x Uint8x64) Min(y Uint8x64) Uint8x64 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUW, CPU Feature: AVX +func (x Uint16x8) Min(y Uint16x8) Uint16x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUW, CPU Feature: AVX2 +func (x Uint16x16) Min(y Uint16x16) Uint16x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUW, CPU Feature: AVX512EVEX +func (x Uint16x32) Min(y Uint16x32) Uint16x32 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUD, CPU Feature: AVX +func (x Uint32x4) Min(y Uint32x4) Uint32x4 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUD, CPU Feature: AVX2 +func (x Uint32x8) Min(y Uint32x8) Uint32x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUD, CPU Feature: AVX512EVEX +func (x Uint32x16) Min(y Uint32x16) Uint32x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUQ, CPU Feature: AVX512EVEX +func (x Uint64x2) Min(y Uint64x2) Uint64x2 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUQ, CPU Feature: AVX512EVEX +func (x Uint64x4) Min(y Uint64x4) Uint64x4 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUQ, CPU Feature: AVX512EVEX +func (x Uint64x8) Min(y Uint64x8) Uint64x8 + +/* Mul */ + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VMULPS, CPU Feature: AVX +func (x Float32x4) Mul(y Float32x4) Float32x4 + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VMULPS, CPU Feature: AVX +func (x Float32x8) Mul(y Float32x8) Float32x8 + +// Mul multiplies corresponding elements of two vectors, masked. +// +// Asm: VMULPS, CPU Feature: AVX512EVEX +func (x Float32x16) Mul(y Float32x16) Float32x16 + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VMULPD, CPU Feature: AVX +func (x Float64x2) Mul(y Float64x2) Float64x2 + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VMULPD, CPU Feature: AVX +func (x Float64x4) Mul(y Float64x4) Float64x4 + +// Mul multiplies corresponding elements of two vectors, masked. +// +// Asm: VMULPD, CPU Feature: AVX512EVEX +func (x Float64x8) Mul(y Float64x8) Float64x8 + +/* MulByPowOf2 */ + +// MulByPowOf2 multiplies elements by a power of 2. +// +// Asm: VSCALEFPS, CPU Feature: AVX512EVEX +func (x Float32x4) MulByPowOf2(y Float32x4) Float32x4 + +// MulByPowOf2 multiplies elements by a power of 2. +// +// Asm: VSCALEFPS, CPU Feature: AVX512EVEX +func (x Float32x8) MulByPowOf2(y Float32x8) Float32x8 + +// MulByPowOf2 multiplies elements by a power of 2. +// +// Asm: VSCALEFPS, CPU Feature: AVX512EVEX +func (x Float32x16) MulByPowOf2(y Float32x16) Float32x16 + +// MulByPowOf2 multiplies elements by a power of 2. +// +// Asm: VSCALEFPD, CPU Feature: AVX512EVEX +func (x Float64x2) MulByPowOf2(y Float64x2) Float64x2 + +// MulByPowOf2 multiplies elements by a power of 2. +// +// Asm: VSCALEFPD, CPU Feature: AVX512EVEX +func (x Float64x4) MulByPowOf2(y Float64x4) Float64x4 + +// MulByPowOf2 multiplies elements by a power of 2. +// +// Asm: VSCALEFPD, CPU Feature: AVX512EVEX +func (x Float64x8) MulByPowOf2(y Float64x8) Float64x8 + +/* MulEvenWiden */ + +// MulEvenWiden multiplies even-indexed elements, widening the result. +// Result[i] = v1.Even[i] * v2.Even[i]. +// +// Asm: VPMULDQ, CPU Feature: AVX +func (x Int32x4) MulEvenWiden(y Int32x4) Int64x2 + +// MulEvenWiden multiplies even-indexed elements, widening the result. +// Result[i] = v1.Even[i] * v2.Even[i]. +// +// Asm: VPMULDQ, CPU Feature: AVX2 +func (x Int32x8) MulEvenWiden(y Int32x8) Int64x4 + +// MulEvenWiden multiplies even-indexed elements, widening the result, masked. +// Result[i] = v1.Even[i] * v2.Even[i]. +// +// Asm: VPMULDQ, CPU Feature: AVX512EVEX +func (x Int64x2) MulEvenWiden(y Int64x2) Int64x2 + +// MulEvenWiden multiplies even-indexed elements, widening the result, masked. +// Result[i] = v1.Even[i] * v2.Even[i]. +// +// Asm: VPMULDQ, CPU Feature: AVX512EVEX +func (x Int64x4) MulEvenWiden(y Int64x4) Int64x4 + +// MulEvenWiden multiplies even-indexed elements, widening the result, masked. +// Result[i] = v1.Even[i] * v2.Even[i]. +// +// Asm: VPMULDQ, CPU Feature: AVX512EVEX +func (x Int64x8) MulEvenWiden(y Int64x8) Int64x8 + +// MulEvenWiden multiplies even-indexed elements, widening the result. +// Result[i] = v1.Even[i] * v2.Even[i]. +// +// Asm: VPMULUDQ, CPU Feature: AVX +func (x Uint32x4) MulEvenWiden(y Uint32x4) Uint64x2 + +// MulEvenWiden multiplies even-indexed elements, widening the result. +// Result[i] = v1.Even[i] * v2.Even[i]. +// +// Asm: VPMULUDQ, CPU Feature: AVX2 +func (x Uint32x8) MulEvenWiden(y Uint32x8) Uint64x4 + +// MulEvenWiden multiplies even-indexed elements, widening the result, masked. +// Result[i] = v1.Even[i] * v2.Even[i]. +// +// Asm: VPMULUDQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MulEvenWiden(y Uint64x2) Uint64x2 + +// MulEvenWiden multiplies even-indexed elements, widening the result, masked. +// Result[i] = v1.Even[i] * v2.Even[i]. +// +// Asm: VPMULUDQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MulEvenWiden(y Uint64x4) Uint64x4 + +// MulEvenWiden multiplies even-indexed elements, widening the result, masked. +// Result[i] = v1.Even[i] * v2.Even[i]. +// +// Asm: VPMULUDQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MulEvenWiden(y Uint64x8) Uint64x8 + +/* MulHigh */ + +// MulHigh multiplies elements and stores the high part of the result. +// +// Asm: VPMULHW, CPU Feature: AVX +func (x Int16x8) MulHigh(y Int16x8) Int16x8 + +// MulHigh multiplies elements and stores the high part of the result. +// +// Asm: VPMULHW, CPU Feature: AVX2 +func (x Int16x16) MulHigh(y Int16x16) Int16x16 + +// MulHigh multiplies elements and stores the high part of the result, masked. +// +// Asm: VPMULHW, CPU Feature: AVX512EVEX +func (x Int16x32) MulHigh(y Int16x32) Int16x32 + +// MulHigh multiplies elements and stores the high part of the result. +// +// Asm: VPMULHUW, CPU Feature: AVX +func (x Uint16x8) MulHigh(y Uint16x8) Uint16x8 + +// MulHigh multiplies elements and stores the high part of the result. +// +// Asm: VPMULHUW, CPU Feature: AVX2 +func (x Uint16x16) MulHigh(y Uint16x16) Uint16x16 + +// MulHigh multiplies elements and stores the high part of the result, masked. +// +// Asm: VPMULHUW, CPU Feature: AVX512EVEX +func (x Uint16x32) MulHigh(y Uint16x32) Uint16x32 + +/* MulLow */ + +// MulLow multiplies elements and stores the low part of the result. +// +// Asm: VPMULLW, CPU Feature: AVX +func (x Int16x8) MulLow(y Int16x8) Int16x8 + +// MulLow multiplies elements and stores the low part of the result. +// +// Asm: VPMULLW, CPU Feature: AVX2 +func (x Int16x16) MulLow(y Int16x16) Int16x16 + +// MulLow multiplies elements and stores the low part of the result, masked. +// +// Asm: VPMULLW, CPU Feature: AVX512EVEX +func (x Int16x32) MulLow(y Int16x32) Int16x32 + +// MulLow multiplies elements and stores the low part of the result. +// +// Asm: VPMULLD, CPU Feature: AVX +func (x Int32x4) MulLow(y Int32x4) Int32x4 + +// MulLow multiplies elements and stores the low part of the result. +// +// Asm: VPMULLD, CPU Feature: AVX2 +func (x Int32x8) MulLow(y Int32x8) Int32x8 + +// MulLow multiplies elements and stores the low part of the result, masked. +// +// Asm: VPMULLD, CPU Feature: AVX512EVEX +func (x Int32x16) MulLow(y Int32x16) Int32x16 + +// MulLow multiplies elements and stores the low part of the result, masked. +// +// Asm: VPMULLQ, CPU Feature: AVX512EVEX +func (x Int64x2) MulLow(y Int64x2) Int64x2 + +// MulLow multiplies elements and stores the low part of the result, masked. +// +// Asm: VPMULLQ, CPU Feature: AVX512EVEX +func (x Int64x4) MulLow(y Int64x4) Int64x4 + +// MulLow multiplies elements and stores the low part of the result, masked. +// +// Asm: VPMULLQ, CPU Feature: AVX512EVEX +func (x Int64x8) MulLow(y Int64x8) Int64x8 + +/* NotEqual */ + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x4) NotEqual(y Float32x4) Mask32x4 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x8) NotEqual(y Float32x8) Mask32x8 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x16) NotEqual(y Float32x16) Mask32x16 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x2) NotEqual(y Float64x2) Mask64x2 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x4) NotEqual(y Float64x4) Mask64x4 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x8) NotEqual(y Float64x8) Mask64x8 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x16) NotEqual(y Int8x16) Mask8x16 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x32) NotEqual(y Int8x32) Mask8x32 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x64) NotEqual(y Int8x64) Mask8x64 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x8) NotEqual(y Int16x8) Mask16x8 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x16) NotEqual(y Int16x16) Mask16x16 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x32) NotEqual(y Int16x32) Mask16x32 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x4) NotEqual(y Int32x4) Mask32x4 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x8) NotEqual(y Int32x8) Mask32x8 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x16) NotEqual(y Int32x16) Mask32x16 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x2) NotEqual(y Int64x2) Mask64x2 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x4) NotEqual(y Int64x4) Mask64x4 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x8) NotEqual(y Int64x8) Mask64x8 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x16) NotEqual(y Uint8x16) Mask8x16 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x32) NotEqual(y Uint8x32) Mask8x32 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x64) NotEqual(y Uint8x64) Mask8x64 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x8) NotEqual(y Uint16x8) Mask16x8 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x16) NotEqual(y Uint16x16) Mask16x16 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x32) NotEqual(y Uint16x32) Mask16x32 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x4) NotEqual(y Uint32x4) Mask32x4 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x8) NotEqual(y Uint32x8) Mask32x8 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x16) NotEqual(y Uint32x16) Mask32x16 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x2) NotEqual(y Uint64x2) Mask64x2 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x4) NotEqual(y Uint64x4) Mask64x4 + +// NotEqual compares for inequality. +// Const Immediate = 4. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x8) NotEqual(y Uint64x8) Mask64x8 + +/* Or */ + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VORPS, CPU Feature: AVX +func (x Float32x4) Or(y Float32x4) Float32x4 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VORPS, CPU Feature: AVX +func (x Float32x8) Or(y Float32x8) Float32x8 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VORPS, CPU Feature: AVX512EVEX +func (x Float32x16) Or(y Float32x16) Float32x16 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VORPD, CPU Feature: AVX +func (x Float64x2) Or(y Float64x2) Float64x2 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VORPD, CPU Feature: AVX +func (x Float64x4) Or(y Float64x4) Float64x4 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VORPD, CPU Feature: AVX512EVEX +func (x Float64x8) Or(y Float64x8) Float64x8 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX +func (x Int8x16) Or(y Int8x16) Int8x16 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX2 +func (x Int8x32) Or(y Int8x32) Int8x32 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX +func (x Int16x8) Or(y Int16x8) Int16x8 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX2 +func (x Int16x16) Or(y Int16x16) Int16x16 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX +func (x Int32x4) Or(y Int32x4) Int32x4 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX2 +func (x Int32x8) Or(y Int32x8) Int32x8 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VPORD, CPU Feature: AVX512EVEX +func (x Int32x16) Or(y Int32x16) Int32x16 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX +func (x Int64x2) Or(y Int64x2) Int64x2 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX2 +func (x Int64x4) Or(y Int64x4) Int64x4 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VPORQ, CPU Feature: AVX512EVEX +func (x Int64x8) Or(y Int64x8) Int64x8 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX +func (x Uint8x16) Or(y Uint8x16) Uint8x16 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX2 +func (x Uint8x32) Or(y Uint8x32) Uint8x32 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX +func (x Uint16x8) Or(y Uint16x8) Uint16x8 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX2 +func (x Uint16x16) Or(y Uint16x16) Uint16x16 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX +func (x Uint32x4) Or(y Uint32x4) Uint32x4 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX2 +func (x Uint32x8) Or(y Uint32x8) Uint32x8 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VPORD, CPU Feature: AVX512EVEX +func (x Uint32x16) Or(y Uint32x16) Uint32x16 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX +func (x Uint64x2) Or(y Uint64x2) Uint64x2 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX2 +func (x Uint64x4) Or(y Uint64x4) Uint64x4 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VPORQ, CPU Feature: AVX512EVEX +func (x Uint64x8) Or(y Uint64x8) Uint64x8 + +/* PairDotProd */ + +// PairDotProd multiplies the elements and add the pairs together, +// yielding a vector of half as many elements with twice the input element size. +// +// Asm: VPMADDWD, CPU Feature: AVX +func (x Int16x8) PairDotProd(y Int16x8) Int32x4 + +// PairDotProd multiplies the elements and add the pairs together, +// yielding a vector of half as many elements with twice the input element size. +// +// Asm: VPMADDWD, CPU Feature: AVX2 +func (x Int16x16) PairDotProd(y Int16x16) Int32x8 + +// PairDotProd multiplies the elements and add the pairs together, +// yielding a vector of half as many elements with twice the input element size. +// +// Asm: VPMADDWD, CPU Feature: AVX512EVEX +func (x Int16x32) PairDotProd(y Int16x32) Int32x16 + +/* PairDotProdAccumulate */ + +// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. +// +// Asm: VPDPWSSD, CPU Feature: AVX_VNNI +func (x Int32x4) PairDotProdAccumulate(y Int32x4, z Int32x4) Int32x4 + +// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. +// +// Asm: VPDPWSSD, CPU Feature: AVX_VNNI +func (x Int32x8) PairDotProdAccumulate(y Int32x8, z Int32x8) Int32x8 + +// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. +// +// Asm: VPDPWSSD, CPU Feature: AVX512EVEX +func (x Int32x16) PairDotProdAccumulate(y Int16x32, z Int32x16) Int32x16 + +/* PairwiseAdd */ + +// PairwiseAdd horizontally adds adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VHADDPS, CPU Feature: AVX +func (x Float32x4) PairwiseAdd(y Float32x4) Float32x4 + +// PairwiseAdd horizontally adds adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VHADDPS, CPU Feature: AVX +func (x Float32x8) PairwiseAdd(y Float32x8) Float32x8 + +// PairwiseAdd horizontally adds adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VHADDPD, CPU Feature: AVX +func (x Float64x2) PairwiseAdd(y Float64x2) Float64x2 + +// PairwiseAdd horizontally adds adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VHADDPD, CPU Feature: AVX +func (x Float64x4) PairwiseAdd(y Float64x4) Float64x4 + +// PairwiseAdd horizontally adds adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VPHADDW, CPU Feature: AVX +func (x Int16x8) PairwiseAdd(y Int16x8) Int16x8 + +// PairwiseAdd horizontally adds adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VPHADDW, CPU Feature: AVX2 +func (x Int16x16) PairwiseAdd(y Int16x16) Int16x16 + +// PairwiseAdd horizontally adds adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VPHADDD, CPU Feature: AVX +func (x Int32x4) PairwiseAdd(y Int32x4) Int32x4 + +// PairwiseAdd horizontally adds adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VPHADDD, CPU Feature: AVX2 +func (x Int32x8) PairwiseAdd(y Int32x8) Int32x8 + +// PairwiseAdd horizontally adds adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VPHADDW, CPU Feature: AVX +func (x Uint16x8) PairwiseAdd(y Uint16x8) Uint16x8 + +// PairwiseAdd horizontally adds adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VPHADDW, CPU Feature: AVX2 +func (x Uint16x16) PairwiseAdd(y Uint16x16) Uint16x16 + +// PairwiseAdd horizontally adds adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VPHADDD, CPU Feature: AVX +func (x Uint32x4) PairwiseAdd(y Uint32x4) Uint32x4 + +// PairwiseAdd horizontally adds adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VPHADDD, CPU Feature: AVX2 +func (x Uint32x8) PairwiseAdd(y Uint32x8) Uint32x8 + +/* PairwiseSub */ + +// PairwiseSub horizontally subtracts adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VHSUBPS, CPU Feature: AVX +func (x Float32x4) PairwiseSub(y Float32x4) Float32x4 + +// PairwiseSub horizontally subtracts adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VHSUBPS, CPU Feature: AVX +func (x Float32x8) PairwiseSub(y Float32x8) Float32x8 + +// PairwiseSub horizontally subtracts adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VHSUBPD, CPU Feature: AVX +func (x Float64x2) PairwiseSub(y Float64x2) Float64x2 + +// PairwiseSub horizontally subtracts adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VHSUBPD, CPU Feature: AVX +func (x Float64x4) PairwiseSub(y Float64x4) Float64x4 + +// PairwiseSub horizontally subtracts adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VPHSUBW, CPU Feature: AVX +func (x Int16x8) PairwiseSub(y Int16x8) Int16x8 + +// PairwiseSub horizontally subtracts adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VPHSUBW, CPU Feature: AVX2 +func (x Int16x16) PairwiseSub(y Int16x16) Int16x16 + +// PairwiseSub horizontally subtracts adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VPHSUBD, CPU Feature: AVX +func (x Int32x4) PairwiseSub(y Int32x4) Int32x4 + +// PairwiseSub horizontally subtracts adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VPHSUBD, CPU Feature: AVX2 +func (x Int32x8) PairwiseSub(y Int32x8) Int32x8 + +// PairwiseSub horizontally subtracts adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VPHSUBW, CPU Feature: AVX +func (x Uint16x8) PairwiseSub(y Uint16x8) Uint16x8 + +// PairwiseSub horizontally subtracts adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VPHSUBW, CPU Feature: AVX2 +func (x Uint16x16) PairwiseSub(y Uint16x16) Uint16x16 + +// PairwiseSub horizontally subtracts adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VPHSUBD, CPU Feature: AVX +func (x Uint32x4) PairwiseSub(y Uint32x4) Uint32x4 + +// PairwiseSub horizontally subtracts adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VPHSUBD, CPU Feature: AVX2 +func (x Uint32x8) PairwiseSub(y Uint32x8) Uint32x8 + +/* PopCount */ + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTB, CPU Feature: AVX512EVEX +func (x Int8x16) PopCount() Int8x16 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTB, CPU Feature: AVX512EVEX +func (x Int8x32) PopCount() Int8x32 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTB, CPU Feature: AVX512EVEX +func (x Int8x64) PopCount() Int8x64 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTW, CPU Feature: AVX512EVEX +func (x Int16x8) PopCount() Int16x8 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTW, CPU Feature: AVX512EVEX +func (x Int16x16) PopCount() Int16x16 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTW, CPU Feature: AVX512EVEX +func (x Int16x32) PopCount() Int16x32 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTD, CPU Feature: AVX512EVEX +func (x Int32x4) PopCount() Int32x4 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTD, CPU Feature: AVX512EVEX +func (x Int32x8) PopCount() Int32x8 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTD, CPU Feature: AVX512EVEX +func (x Int32x16) PopCount() Int32x16 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX +func (x Int64x2) PopCount() Int64x2 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX +func (x Int64x4) PopCount() Int64x4 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX +func (x Int64x8) PopCount() Int64x8 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTB, CPU Feature: AVX512EVEX +func (x Uint8x16) PopCount() Uint8x16 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTB, CPU Feature: AVX512EVEX +func (x Uint8x32) PopCount() Uint8x32 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTB, CPU Feature: AVX512EVEX +func (x Uint8x64) PopCount() Uint8x64 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTW, CPU Feature: AVX512EVEX +func (x Uint16x8) PopCount() Uint16x8 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTW, CPU Feature: AVX512EVEX +func (x Uint16x16) PopCount() Uint16x16 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTW, CPU Feature: AVX512EVEX +func (x Uint16x32) PopCount() Uint16x32 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTD, CPU Feature: AVX512EVEX +func (x Uint32x4) PopCount() Uint32x4 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTD, CPU Feature: AVX512EVEX +func (x Uint32x8) PopCount() Uint32x8 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTD, CPU Feature: AVX512EVEX +func (x Uint32x16) PopCount() Uint32x16 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX +func (x Uint64x2) PopCount() Uint64x2 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX +func (x Uint64x4) PopCount() Uint64x4 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX +func (x Uint64x8) PopCount() Uint64x8 + +/* Round */ + +// Round rounds elements to the nearest integer. +// Const Immediate = 0. +// +// Asm: VROUNDPS, CPU Feature: AVX +func (x Float32x4) Round() Float32x4 + +// Round rounds elements to the nearest integer. +// Const Immediate = 0. +// +// Asm: VROUNDPS, CPU Feature: AVX +func (x Float32x8) Round() Float32x8 + +// Round rounds elements to the nearest integer. +// Const Immediate = 0. +// +// Asm: VROUNDPD, CPU Feature: AVX +func (x Float64x2) Round() Float64x2 + +// Round rounds elements to the nearest integer. +// Const Immediate = 0. +// +// Asm: VROUNDPD, CPU Feature: AVX +func (x Float64x4) Round() Float64x4 + +/* RoundSuppressExceptionWithPrecision */ + +// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions. +// Const Immediate = 8. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x4) RoundSuppressExceptionWithPrecision(imm8 uint8) Float32x4 + +// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions. +// Const Immediate = 8. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x8) RoundSuppressExceptionWithPrecision(imm8 uint8) Float32x8 + +// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions. +// Const Immediate = 8. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) RoundSuppressExceptionWithPrecision(imm8 uint8) Float32x16 + +// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions. +// Const Immediate = 8. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x2) RoundSuppressExceptionWithPrecision(imm8 uint8) Float64x2 + +// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions. +// Const Immediate = 8. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x4) RoundSuppressExceptionWithPrecision(imm8 uint8) Float64x4 + +// RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions. +// Const Immediate = 8. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x8) RoundSuppressExceptionWithPrecision(imm8 uint8) Float64x8 + +/* RoundWithPrecision */ + +// RoundWithPrecision rounds elements with specified precision. +// Const Immediate = 0. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x4) RoundWithPrecision(imm8 uint8) Float32x4 + +// RoundWithPrecision rounds elements with specified precision. +// Const Immediate = 0. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x8) RoundWithPrecision(imm8 uint8) Float32x8 + +// RoundWithPrecision rounds elements with specified precision. +// Const Immediate = 0. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) RoundWithPrecision(imm8 uint8) Float32x16 + +// RoundWithPrecision rounds elements with specified precision. +// Const Immediate = 0. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x2) RoundWithPrecision(imm8 uint8) Float64x2 + +// RoundWithPrecision rounds elements with specified precision. +// Const Immediate = 0. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x4) RoundWithPrecision(imm8 uint8) Float64x4 + +// RoundWithPrecision rounds elements with specified precision. +// Const Immediate = 0. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x8) RoundWithPrecision(imm8 uint8) Float64x8 + +/* SaturatedAdd */ + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSB, CPU Feature: AVX +func (x Int8x16) SaturatedAdd(y Int8x16) Int8x16 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSB, CPU Feature: AVX2 +func (x Int8x32) SaturatedAdd(y Int8x32) Int8x32 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSB, CPU Feature: AVX512EVEX +func (x Int8x64) SaturatedAdd(y Int8x64) Int8x64 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSW, CPU Feature: AVX +func (x Int16x8) SaturatedAdd(y Int16x8) Int16x8 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSW, CPU Feature: AVX2 +func (x Int16x16) SaturatedAdd(y Int16x16) Int16x16 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSW, CPU Feature: AVX512EVEX +func (x Int16x32) SaturatedAdd(y Int16x32) Int16x32 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSB, CPU Feature: AVX +func (x Uint8x16) SaturatedAdd(y Uint8x16) Uint8x16 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSB, CPU Feature: AVX2 +func (x Uint8x32) SaturatedAdd(y Uint8x32) Uint8x32 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSB, CPU Feature: AVX512EVEX +func (x Uint8x64) SaturatedAdd(y Uint8x64) Uint8x64 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSW, CPU Feature: AVX +func (x Uint16x8) SaturatedAdd(y Uint16x8) Uint16x8 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSW, CPU Feature: AVX2 +func (x Uint16x16) SaturatedAdd(y Uint16x16) Uint16x16 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSW, CPU Feature: AVX512EVEX +func (x Uint16x32) SaturatedAdd(y Uint16x32) Uint16x32 + +/* SaturatedPairDotProdAccumulate */ + +// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. +// +// Asm: VPDPWSSDS, CPU Feature: AVX_VNNI +func (x Int32x4) SaturatedPairDotProdAccumulate(y Int32x4, z Int32x4) Int32x4 + +// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. +// +// Asm: VPDPWSSDS, CPU Feature: AVX_VNNI +func (x Int32x8) SaturatedPairDotProdAccumulate(y Int32x8, z Int32x8) Int32x8 + +// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. +// +// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX +func (x Int32x16) SaturatedPairDotProdAccumulate(y Int16x32, z Int32x16) Int32x16 + +/* SaturatedPairwiseAdd */ + +// SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VPHADDSW, CPU Feature: AVX +func (x Int16x8) SaturatedPairwiseAdd(y Int16x8) Int16x8 + +// SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VPHADDSW, CPU Feature: AVX2 +func (x Int16x16) SaturatedPairwiseAdd(y Int16x16) Int16x16 + +/* SaturatedPairwiseSub */ + +// SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VPHSUBSW, CPU Feature: AVX +func (x Int16x8) SaturatedPairwiseSub(y Int16x8) Int16x8 + +// SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VPHSUBSW, CPU Feature: AVX2 +func (x Int16x16) SaturatedPairwiseSub(y Int16x16) Int16x16 + +/* SaturatedSub */ + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSB, CPU Feature: AVX +func (x Int8x16) SaturatedSub(y Int8x16) Int8x16 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSB, CPU Feature: AVX2 +func (x Int8x32) SaturatedSub(y Int8x32) Int8x32 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSB, CPU Feature: AVX512EVEX +func (x Int8x64) SaturatedSub(y Int8x64) Int8x64 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSW, CPU Feature: AVX +func (x Int16x8) SaturatedSub(y Int16x8) Int16x8 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSW, CPU Feature: AVX2 +func (x Int16x16) SaturatedSub(y Int16x16) Int16x16 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSW, CPU Feature: AVX512EVEX +func (x Int16x32) SaturatedSub(y Int16x32) Int16x32 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSB, CPU Feature: AVX +func (x Uint8x16) SaturatedSub(y Uint8x16) Uint8x16 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSB, CPU Feature: AVX2 +func (x Uint8x32) SaturatedSub(y Uint8x32) Uint8x32 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSB, CPU Feature: AVX512EVEX +func (x Uint8x64) SaturatedSub(y Uint8x64) Uint8x64 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSW, CPU Feature: AVX +func (x Uint16x8) SaturatedSub(y Uint16x8) Uint16x8 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSW, CPU Feature: AVX2 +func (x Uint16x16) SaturatedSub(y Uint16x16) Uint16x16 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSW, CPU Feature: AVX512EVEX +func (x Uint16x32) SaturatedSub(y Uint16x32) Uint16x32 + +/* SaturatedUnsignedSignedPairDotProd */ + +// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, +// yielding a vector of half as many elements with twice the input element size. +// +// Asm: VPMADDUBSW, CPU Feature: AVX +func (x Uint8x16) SaturatedUnsignedSignedPairDotProd(y Int8x16) Int16x8 + +// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, +// yielding a vector of half as many elements with twice the input element size. +// +// Asm: VPMADDUBSW, CPU Feature: AVX2 +func (x Uint8x32) SaturatedUnsignedSignedPairDotProd(y Int8x32) Int16x16 + +// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, +// yielding a vector of half as many elements with twice the input element size. +// +// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX +func (x Uint16x8) SaturatedUnsignedSignedPairDotProd(y Int16x8) Int16x8 + +// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, +// yielding a vector of half as many elements with twice the input element size. +// +// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX +func (x Uint16x16) SaturatedUnsignedSignedPairDotProd(y Int16x16) Int16x16 + +// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, +// yielding a vector of half as many elements with twice the input element size. +// +// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX +func (x Uint16x32) SaturatedUnsignedSignedPairDotProd(y Int16x32) Int16x32 + +/* SaturatedUnsignedSignedQuadDotProdAccumulate */ + +// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI +func (x Int32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Int32x4 + +// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI +func (x Int32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Int32x8 + +// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX +func (x Int32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16 + +// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI +func (x Uint32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Uint32x4 + +// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI +func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Uint32x8 + +// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX +func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Uint32x16 + +/* Sign */ + +// Sign returns the product of the first operand with -1, 0, or 1, +// whichever constant is nearest to the value of the second operand. +// +// Asm: VPSIGNB, CPU Feature: AVX +func (x Int8x16) Sign(y Int8x16) Int8x16 + +// Sign returns the product of the first operand with -1, 0, or 1, +// whichever constant is nearest to the value of the second operand. +// +// Asm: VPSIGNB, CPU Feature: AVX2 +func (x Int8x32) Sign(y Int8x32) Int8x32 + +// Sign returns the product of the first operand with -1, 0, or 1, +// whichever constant is nearest to the value of the second operand. +// +// Asm: VPSIGNW, CPU Feature: AVX +func (x Int16x8) Sign(y Int16x8) Int16x8 + +// Sign returns the product of the first operand with -1, 0, or 1, +// whichever constant is nearest to the value of the second operand. +// +// Asm: VPSIGNW, CPU Feature: AVX2 +func (x Int16x16) Sign(y Int16x16) Int16x16 + +// Sign returns the product of the first operand with -1, 0, or 1, +// whichever constant is nearest to the value of the second operand. +// +// Asm: VPSIGND, CPU Feature: AVX +func (x Int32x4) Sign(y Int32x4) Int32x4 + +// Sign returns the product of the first operand with -1, 0, or 1, +// whichever constant is nearest to the value of the second operand. +// +// Asm: VPSIGND, CPU Feature: AVX2 +func (x Int32x8) Sign(y Int32x8) Int32x8 + +/* Sqrt */ + +// Sqrt computes the square root of each element. +// +// Asm: VSQRTPS, CPU Feature: AVX +func (x Float32x4) Sqrt() Float32x4 + +// Sqrt computes the square root of each element. +// +// Asm: VSQRTPS, CPU Feature: AVX +func (x Float32x8) Sqrt() Float32x8 + +// Sqrt computes the square root of each element. +// +// Asm: VSQRTPS, CPU Feature: AVX512EVEX +func (x Float32x16) Sqrt() Float32x16 + +// Sqrt computes the square root of each element. +// +// Asm: VSQRTPD, CPU Feature: AVX +func (x Float64x2) Sqrt() Float64x2 + +// Sqrt computes the square root of each element. +// +// Asm: VSQRTPD, CPU Feature: AVX +func (x Float64x4) Sqrt() Float64x4 + +// Sqrt computes the square root of each element. +// +// Asm: VSQRTPD, CPU Feature: AVX512EVEX +func (x Float64x8) Sqrt() Float64x8 + +/* Sub */ + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VSUBPS, CPU Feature: AVX +func (x Float32x4) Sub(y Float32x4) Float32x4 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VSUBPS, CPU Feature: AVX +func (x Float32x8) Sub(y Float32x8) Float32x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VSUBPS, CPU Feature: AVX512EVEX +func (x Float32x16) Sub(y Float32x16) Float32x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VSUBPD, CPU Feature: AVX +func (x Float64x2) Sub(y Float64x2) Float64x2 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VSUBPD, CPU Feature: AVX +func (x Float64x4) Sub(y Float64x4) Float64x4 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VSUBPD, CPU Feature: AVX512EVEX +func (x Float64x8) Sub(y Float64x8) Float64x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBB, CPU Feature: AVX +func (x Int8x16) Sub(y Int8x16) Int8x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBB, CPU Feature: AVX2 +func (x Int8x32) Sub(y Int8x32) Int8x32 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBB, CPU Feature: AVX512EVEX +func (x Int8x64) Sub(y Int8x64) Int8x64 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBW, CPU Feature: AVX +func (x Int16x8) Sub(y Int16x8) Int16x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBW, CPU Feature: AVX2 +func (x Int16x16) Sub(y Int16x16) Int16x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBW, CPU Feature: AVX512EVEX +func (x Int16x32) Sub(y Int16x32) Int16x32 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBD, CPU Feature: AVX +func (x Int32x4) Sub(y Int32x4) Int32x4 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBD, CPU Feature: AVX2 +func (x Int32x8) Sub(y Int32x8) Int32x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBD, CPU Feature: AVX512EVEX +func (x Int32x16) Sub(y Int32x16) Int32x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBQ, CPU Feature: AVX +func (x Int64x2) Sub(y Int64x2) Int64x2 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBQ, CPU Feature: AVX2 +func (x Int64x4) Sub(y Int64x4) Int64x4 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBQ, CPU Feature: AVX512EVEX +func (x Int64x8) Sub(y Int64x8) Int64x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBB, CPU Feature: AVX +func (x Uint8x16) Sub(y Uint8x16) Uint8x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBB, CPU Feature: AVX2 +func (x Uint8x32) Sub(y Uint8x32) Uint8x32 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBB, CPU Feature: AVX512EVEX +func (x Uint8x64) Sub(y Uint8x64) Uint8x64 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBW, CPU Feature: AVX +func (x Uint16x8) Sub(y Uint16x8) Uint16x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBW, CPU Feature: AVX2 +func (x Uint16x16) Sub(y Uint16x16) Uint16x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBW, CPU Feature: AVX512EVEX +func (x Uint16x32) Sub(y Uint16x32) Uint16x32 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBD, CPU Feature: AVX +func (x Uint32x4) Sub(y Uint32x4) Uint32x4 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBD, CPU Feature: AVX2 +func (x Uint32x8) Sub(y Uint32x8) Uint32x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBD, CPU Feature: AVX512EVEX +func (x Uint32x16) Sub(y Uint32x16) Uint32x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBQ, CPU Feature: AVX +func (x Uint64x2) Sub(y Uint64x2) Uint64x2 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBQ, CPU Feature: AVX2 +func (x Uint64x4) Sub(y Uint64x4) Uint64x4 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBQ, CPU Feature: AVX512EVEX +func (x Uint64x8) Sub(y Uint64x8) Uint64x8 + +/* Trunc */ + +// Trunc truncates elements towards zero. +// Const Immediate = 3. +// +// Asm: VROUNDPS, CPU Feature: AVX +func (x Float32x4) Trunc() Float32x4 + +// Trunc truncates elements towards zero. +// Const Immediate = 3. +// +// Asm: VROUNDPS, CPU Feature: AVX +func (x Float32x8) Trunc() Float32x8 + +// Trunc truncates elements towards zero. +// Const Immediate = 3. +// +// Asm: VROUNDPD, CPU Feature: AVX +func (x Float64x2) Trunc() Float64x2 + +// Trunc truncates elements towards zero. +// Const Immediate = 3. +// +// Asm: VROUNDPD, CPU Feature: AVX +func (x Float64x4) Trunc() Float64x4 + +/* TruncSuppressExceptionWithPrecision */ + +// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions. +// Const Immediate = 11. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x4) TruncSuppressExceptionWithPrecision(imm8 uint8) Float32x4 + +// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions. +// Const Immediate = 11. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x8) TruncSuppressExceptionWithPrecision(imm8 uint8) Float32x8 + +// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions. +// Const Immediate = 11. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) TruncSuppressExceptionWithPrecision(imm8 uint8) Float32x16 + +// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions. +// Const Immediate = 11. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x2) TruncSuppressExceptionWithPrecision(imm8 uint8) Float64x2 + +// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions. +// Const Immediate = 11. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x4) TruncSuppressExceptionWithPrecision(imm8 uint8) Float64x4 + +// TruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions. +// Const Immediate = 11. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x8) TruncSuppressExceptionWithPrecision(imm8 uint8) Float64x8 + +/* TruncWithPrecision */ + +// TruncWithPrecision truncates elements with specified precision. +// Const Immediate = 3. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x4) TruncWithPrecision(imm8 uint8) Float32x4 + +// TruncWithPrecision truncates elements with specified precision. +// Const Immediate = 3. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x8) TruncWithPrecision(imm8 uint8) Float32x8 + +// TruncWithPrecision truncates elements with specified precision. +// Const Immediate = 3. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) TruncWithPrecision(imm8 uint8) Float32x16 + +// TruncWithPrecision truncates elements with specified precision. +// Const Immediate = 3. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x2) TruncWithPrecision(imm8 uint8) Float64x2 + +// TruncWithPrecision truncates elements with specified precision. +// Const Immediate = 3. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x4) TruncWithPrecision(imm8 uint8) Float64x4 + +// TruncWithPrecision truncates elements with specified precision. +// Const Immediate = 3. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x8) TruncWithPrecision(imm8 uint8) Float64x8 + +/* UnsignedSignedQuadDotProdAccumulate */ + +// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSD, CPU Feature: AVX_VNNI +func (x Int32x4) UnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Int32x4 + +// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSD, CPU Feature: AVX_VNNI +func (x Int32x8) UnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Int32x8 + +// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSD, CPU Feature: AVX512EVEX +func (x Int32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16 + +// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSD, CPU Feature: AVX_VNNI +func (x Uint32x4) UnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Uint32x4 + +// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSD, CPU Feature: AVX_VNNI +func (x Uint32x8) UnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Uint32x8 + +// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSD, CPU Feature: AVX512EVEX +func (x Uint32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Uint32x16 + +/* Xor */ + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VXORPS, CPU Feature: AVX +func (x Float32x4) Xor(y Float32x4) Float32x4 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VXORPS, CPU Feature: AVX +func (x Float32x8) Xor(y Float32x8) Float32x8 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VXORPS, CPU Feature: AVX512EVEX +func (x Float32x16) Xor(y Float32x16) Float32x16 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VXORPD, CPU Feature: AVX +func (x Float64x2) Xor(y Float64x2) Float64x2 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VXORPD, CPU Feature: AVX +func (x Float64x4) Xor(y Float64x4) Float64x4 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VXORPD, CPU Feature: AVX512EVEX +func (x Float64x8) Xor(y Float64x8) Float64x8 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX +func (x Int8x16) Xor(y Int8x16) Int8x16 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX2 +func (x Int8x32) Xor(y Int8x32) Int8x32 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX +func (x Int16x8) Xor(y Int16x8) Int16x8 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX2 +func (x Int16x16) Xor(y Int16x16) Int16x16 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX +func (x Int32x4) Xor(y Int32x4) Int32x4 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX2 +func (x Int32x8) Xor(y Int32x8) Int32x8 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORD, CPU Feature: AVX512EVEX +func (x Int32x16) Xor(y Int32x16) Int32x16 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX +func (x Int64x2) Xor(y Int64x2) Int64x2 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX2 +func (x Int64x4) Xor(y Int64x4) Int64x4 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORQ, CPU Feature: AVX512EVEX +func (x Int64x8) Xor(y Int64x8) Int64x8 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX +func (x Uint8x16) Xor(y Uint8x16) Uint8x16 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX2 +func (x Uint8x32) Xor(y Uint8x32) Uint8x32 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX +func (x Uint16x8) Xor(y Uint16x8) Uint16x8 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX2 +func (x Uint16x16) Xor(y Uint16x16) Uint16x16 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX +func (x Uint32x4) Xor(y Uint32x4) Uint32x4 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX2 +func (x Uint32x8) Xor(y Uint32x8) Uint32x8 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORD, CPU Feature: AVX512EVEX +func (x Uint32x16) Xor(y Uint32x16) Uint32x16 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX +func (x Uint64x2) Xor(y Uint64x2) Uint64x2 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX2 +func (x Uint64x4) Xor(y Uint64x4) Uint64x4 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORQ, CPU Feature: AVX512EVEX +func (x Uint64x8) Xor(y Uint64x8) Uint64x8 + // Float64x8 converts from Float32x16 to Float64x8 func (from Float32x16) AsFloat64x8() (to Float64x8) diff --git a/src/simd/types_amd64.go b/src/simd/types_amd64.go index ab0f15a89e2..67f4d297024 100644 --- a/src/simd/types_amd64.go +++ b/src/simd/types_amd64.go @@ -9,6 +9,25 @@ type v128 struct { _128 struct{} } +// Int8x16 is a 128-bit SIMD vector of 16 int8 +type Int8x16 struct { + int8x16 v128 + vals [16]int8 +} + +// Len returns the number of elements in a Int8x16 +func (x Int8x16) Len() int { return 16 } + +// LoadInt8x16 loads a Int8x16 from an array +// +//go:noescape +func LoadInt8x16(y *[16]int8) Int8x16 + +// Store stores a Int8x16 to an array +// +//go:noescape +func (x Int8x16) Store(y *[16]int8) + // Int16x8 is a 128-bit SIMD vector of 8 int16 type Int16x8 struct { int16x8 v128 @@ -47,25 +66,6 @@ func LoadInt32x4(y *[4]int32) Int32x4 //go:noescape func (x Int32x4) Store(y *[4]int32) -// Int8x16 is a 128-bit SIMD vector of 16 int8 -type Int8x16 struct { - int8x16 v128 - vals [16]int8 -} - -// Len returns the number of elements in a Int8x16 -func (x Int8x16) Len() int { return 16 } - -// LoadInt8x16 loads a Int8x16 from an array -// -//go:noescape -func LoadInt8x16(y *[16]int8) Int8x16 - -// Store stores a Int8x16 to an array -// -//go:noescape -func (x Int8x16) Store(y *[16]int8) - // Int64x2 is a 128-bit SIMD vector of 2 int64 type Int64x2 struct { int64x2 v128 @@ -129,6 +129,25 @@ func LoadFloat64x2(y *[2]float64) Float64x2 //go:noescape func (x Float64x2) Store(y *[2]float64) +// Uint8x16 is a 128-bit SIMD vector of 16 uint8 +type Uint8x16 struct { + uint8x16 v128 + vals [16]uint8 +} + +// Len returns the number of elements in a Uint8x16 +func (x Uint8x16) Len() int { return 16 } + +// LoadUint8x16 loads a Uint8x16 from an array +// +//go:noescape +func LoadUint8x16(y *[16]uint8) Uint8x16 + +// Store stores a Uint8x16 to an array +// +//go:noescape +func (x Uint8x16) Store(y *[16]uint8) + // Uint16x8 is a 128-bit SIMD vector of 8 uint16 type Uint16x8 struct { uint16x8 v128 @@ -186,48 +205,48 @@ func LoadUint64x2(y *[2]uint64) Uint64x2 //go:noescape func (x Uint64x2) Store(y *[2]uint64) -// Uint8x16 is a 128-bit SIMD vector of 16 uint8 -type Uint8x16 struct { - uint8x16 v128 - vals [16]uint8 -} - -// Len returns the number of elements in a Uint8x16 -func (x Uint8x16) Len() int { return 16 } - -// LoadUint8x16 loads a Uint8x16 from an array -// -//go:noescape -func LoadUint8x16(y *[16]uint8) Uint8x16 - -// Store stores a Uint8x16 to an array -// -//go:noescape -func (x Uint8x16) Store(y *[16]uint8) - // Mask32x4 is a 128-bit SIMD vector of 4 int32 type Mask32x4 struct { int32x4 v128 vals [4]int32 } -// Mask16x8 is a 128-bit SIMD vector of 8 int16 -type Mask16x8 struct { - int16x8 v128 - vals [8]int16 -} - // Mask8x16 is a 128-bit SIMD vector of 16 int8 type Mask8x16 struct { int8x16 v128 vals [16]int8 } +// Mask16x8 is a 128-bit SIMD vector of 8 int16 +type Mask16x8 struct { + int16x8 v128 + vals [8]int16 +} + // v256 is a tag type that tells the compiler that this is really 256-bit SIMD type v256 struct { _256 struct{} } +// Int8x32 is a 256-bit SIMD vector of 32 int8 +type Int8x32 struct { + int8x32 v256 + vals [32]int8 +} + +// Len returns the number of elements in a Int8x32 +func (x Int8x32) Len() int { return 32 } + +// LoadInt8x32 loads a Int8x32 from an array +// +//go:noescape +func LoadInt8x32(y *[32]int8) Int8x32 + +// Store stores a Int8x32 to an array +// +//go:noescape +func (x Int8x32) Store(y *[32]int8) + // Int16x16 is a 256-bit SIMD vector of 16 int16 type Int16x16 struct { int16x16 v256 @@ -266,25 +285,6 @@ func LoadInt32x8(y *[8]int32) Int32x8 //go:noescape func (x Int32x8) Store(y *[8]int32) -// Int8x32 is a 256-bit SIMD vector of 32 int8 -type Int8x32 struct { - int8x32 v256 - vals [32]int8 -} - -// Len returns the number of elements in a Int8x32 -func (x Int8x32) Len() int { return 32 } - -// LoadInt8x32 loads a Int8x32 from an array -// -//go:noescape -func LoadInt8x32(y *[32]int8) Int8x32 - -// Store stores a Int8x32 to an array -// -//go:noescape -func (x Int8x32) Store(y *[32]int8) - // Int64x4 is a 256-bit SIMD vector of 4 int64 type Int64x4 struct { int64x4 v256 @@ -348,6 +348,25 @@ func LoadFloat64x4(y *[4]float64) Float64x4 //go:noescape func (x Float64x4) Store(y *[4]float64) +// Uint8x32 is a 256-bit SIMD vector of 32 uint8 +type Uint8x32 struct { + uint8x32 v256 + vals [32]uint8 +} + +// Len returns the number of elements in a Uint8x32 +func (x Uint8x32) Len() int { return 32 } + +// LoadUint8x32 loads a Uint8x32 from an array +// +//go:noescape +func LoadUint8x32(y *[32]uint8) Uint8x32 + +// Store stores a Uint8x32 to an array +// +//go:noescape +func (x Uint8x32) Store(y *[32]uint8) + // Uint16x16 is a 256-bit SIMD vector of 16 uint16 type Uint16x16 struct { uint16x16 v256 @@ -405,48 +424,54 @@ func LoadUint64x4(y *[4]uint64) Uint64x4 //go:noescape func (x Uint64x4) Store(y *[4]uint64) -// Uint8x32 is a 256-bit SIMD vector of 32 uint8 -type Uint8x32 struct { - uint8x32 v256 - vals [32]uint8 -} - -// Len returns the number of elements in a Uint8x32 -func (x Uint8x32) Len() int { return 32 } - -// LoadUint8x32 loads a Uint8x32 from an array -// -//go:noescape -func LoadUint8x32(y *[32]uint8) Uint8x32 - -// Store stores a Uint8x32 to an array -// -//go:noescape -func (x Uint8x32) Store(y *[32]uint8) - // Mask32x8 is a 256-bit SIMD vector of 8 int32 type Mask32x8 struct { int32x8 v256 vals [8]int32 } -// Mask16x16 is a 256-bit SIMD vector of 16 int16 -type Mask16x16 struct { - int16x16 v256 - vals [16]int16 -} - // Mask8x32 is a 256-bit SIMD vector of 32 int8 type Mask8x32 struct { int8x32 v256 vals [32]int8 } +// Mask16x16 is a 256-bit SIMD vector of 16 int16 +type Mask16x16 struct { + int16x16 v256 + vals [16]int16 +} + // v512 is a tag type that tells the compiler that this is really 512-bit SIMD type v512 struct { _512 struct{} } +// Int8x64 is a 512-bit SIMD vector of 64 int8 +type Int8x64 struct { + int8x64 v512 + vals [64]int8 +} + +// Len returns the number of elements in a Int8x64 +func (x Int8x64) Len() int { return 64 } + +// LoadInt8x64 loads a Int8x64 from an array +// +//go:noescape +func LoadInt8x64(y *[64]int8) Int8x64 + +// Store stores a Int8x64 to an array +// +//go:noescape +func (x Int8x64) Store(y *[64]int8) + +// Mask8x64 is a 512-bit SIMD vector of 64 int8 +type Mask8x64 struct { + int8x64 v512 + vals [64]int8 +} + // Int16x32 is a 512-bit SIMD vector of 32 int16 type Int16x32 struct { int16x32 v512 @@ -522,31 +547,6 @@ type Mask64x8 struct { vals [8]int64 } -// Int8x64 is a 512-bit SIMD vector of 64 int8 -type Int8x64 struct { - int8x64 v512 - vals [64]int8 -} - -// Len returns the number of elements in a Int8x64 -func (x Int8x64) Len() int { return 64 } - -// LoadInt8x64 loads a Int8x64 from an array -// -//go:noescape -func LoadInt8x64(y *[64]int8) Int8x64 - -// Store stores a Int8x64 to an array -// -//go:noescape -func (x Int8x64) Store(y *[64]int8) - -// Mask8x64 is a 512-bit SIMD vector of 64 int8 -type Mask8x64 struct { - int8x64 v512 - vals [64]int8 -} - // Float32x16 is a 512-bit SIMD vector of 16 float32 type Float32x16 struct { float32x16 v512 @@ -585,6 +585,25 @@ func LoadFloat64x8(y *[8]float64) Float64x8 //go:noescape func (x Float64x8) Store(y *[8]float64) +// Uint8x64 is a 512-bit SIMD vector of 64 uint8 +type Uint8x64 struct { + uint8x64 v512 + vals [64]uint8 +} + +// Len returns the number of elements in a Uint8x64 +func (x Uint8x64) Len() int { return 64 } + +// LoadUint8x64 loads a Uint8x64 from an array +// +//go:noescape +func LoadUint8x64(y *[64]uint8) Uint8x64 + +// Store stores a Uint8x64 to an array +// +//go:noescape +func (x Uint8x64) Store(y *[64]uint8) + // Uint16x32 is a 512-bit SIMD vector of 32 uint16 type Uint16x32 struct { uint16x32 v512 @@ -641,22 +660,3 @@ func LoadUint64x8(y *[8]uint64) Uint64x8 // //go:noescape func (x Uint64x8) Store(y *[8]uint64) - -// Uint8x64 is a 512-bit SIMD vector of 64 uint8 -type Uint8x64 struct { - uint8x64 v512 - vals [64]uint8 -} - -// Len returns the number of elements in a Uint8x64 -func (x Uint8x64) Len() int { return 64 } - -// LoadUint8x64 loads a Uint8x64 from an array -// -//go:noescape -func LoadUint8x64(y *[64]uint8) Uint8x64 - -// Store stores a Uint8x64 to an array -// -//go:noescape -func (x Uint8x64) Store(y *[64]uint8)