[dev.simd] cmd/compile: add masked merging ops and optimizations

This CL generates optimizations for masked variant of AVX512
instructions for patterns:

x.Op(y).Merge(z, mask) => OpMasked(z, x, y mask), where OpMasked is
resultInArg0.

Change-Id: Ife7ccc9ddbf76ae921a085bd6a42b965da9bc179
Reviewed-on: https://go-review.googlesource.com/c/go/+/718160
Reviewed-by: David Chase <drchase@google.com>
TryBot-Bypass: Junyang Shao <shaojunyang@google.com>
This commit is contained in:
Junyang Shao 2025-11-05 19:25:00 +00:00
parent 771a1dc216
commit 86b4fe31d9
15 changed files with 17367 additions and 627 deletions

View file

@ -914,12 +914,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VSQRTPDMasked128,
ssa.OpAMD64VSQRTPDMasked256,
ssa.OpAMD64VSQRTPDMasked512,
ssa.OpAMD64VMOVUPSMasked128,
ssa.OpAMD64VMOVUPSMasked256,
ssa.OpAMD64VMOVUPSMasked512,
ssa.OpAMD64VMOVUPDMasked128,
ssa.OpAMD64VMOVUPDMasked256,
ssa.OpAMD64VMOVUPDMasked512,
ssa.OpAMD64VMOVDQU8Masked128,
ssa.OpAMD64VMOVDQU8Masked256,
ssa.OpAMD64VMOVDQU8Masked512,
@ -1225,6 +1219,129 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPDPBUSDSMasked128,
ssa.OpAMD64VPDPBUSDSMasked256,
ssa.OpAMD64VPDPBUSDSMasked512,
ssa.OpAMD64VADDPSMasked128Merging,
ssa.OpAMD64VADDPSMasked256Merging,
ssa.OpAMD64VADDPSMasked512Merging,
ssa.OpAMD64VADDPDMasked128Merging,
ssa.OpAMD64VADDPDMasked256Merging,
ssa.OpAMD64VADDPDMasked512Merging,
ssa.OpAMD64VPADDBMasked128Merging,
ssa.OpAMD64VPADDBMasked256Merging,
ssa.OpAMD64VPADDBMasked512Merging,
ssa.OpAMD64VPADDWMasked128Merging,
ssa.OpAMD64VPADDWMasked256Merging,
ssa.OpAMD64VPADDWMasked512Merging,
ssa.OpAMD64VPADDDMasked128Merging,
ssa.OpAMD64VPADDDMasked256Merging,
ssa.OpAMD64VPADDDMasked512Merging,
ssa.OpAMD64VPADDQMasked128Merging,
ssa.OpAMD64VPADDQMasked256Merging,
ssa.OpAMD64VPADDQMasked512Merging,
ssa.OpAMD64VPADDSBMasked128Merging,
ssa.OpAMD64VPADDSBMasked256Merging,
ssa.OpAMD64VPADDSBMasked512Merging,
ssa.OpAMD64VPADDSWMasked128Merging,
ssa.OpAMD64VPADDSWMasked256Merging,
ssa.OpAMD64VPADDSWMasked512Merging,
ssa.OpAMD64VPADDUSBMasked128Merging,
ssa.OpAMD64VPADDUSBMasked256Merging,
ssa.OpAMD64VPADDUSBMasked512Merging,
ssa.OpAMD64VPADDUSWMasked128Merging,
ssa.OpAMD64VPADDUSWMasked256Merging,
ssa.OpAMD64VPADDUSWMasked512Merging,
ssa.OpAMD64VPANDDMasked128Merging,
ssa.OpAMD64VPANDDMasked256Merging,
ssa.OpAMD64VPANDDMasked512Merging,
ssa.OpAMD64VPANDQMasked128Merging,
ssa.OpAMD64VPANDQMasked256Merging,
ssa.OpAMD64VPANDQMasked512Merging,
ssa.OpAMD64VPAVGBMasked128Merging,
ssa.OpAMD64VPAVGBMasked256Merging,
ssa.OpAMD64VPAVGBMasked512Merging,
ssa.OpAMD64VPAVGWMasked128Merging,
ssa.OpAMD64VPAVGWMasked256Merging,
ssa.OpAMD64VPAVGWMasked512Merging,
ssa.OpAMD64VPACKSSDWMasked128Merging,
ssa.OpAMD64VPACKSSDWMasked256Merging,
ssa.OpAMD64VPACKSSDWMasked512Merging,
ssa.OpAMD64VPACKUSDWMasked128Merging,
ssa.OpAMD64VPACKUSDWMasked256Merging,
ssa.OpAMD64VPACKUSDWMasked512Merging,
ssa.OpAMD64VDIVPSMasked128Merging,
ssa.OpAMD64VDIVPSMasked256Merging,
ssa.OpAMD64VDIVPSMasked512Merging,
ssa.OpAMD64VDIVPDMasked128Merging,
ssa.OpAMD64VDIVPDMasked256Merging,
ssa.OpAMD64VDIVPDMasked512Merging,
ssa.OpAMD64VPMADDWDMasked128Merging,
ssa.OpAMD64VPMADDWDMasked256Merging,
ssa.OpAMD64VPMADDWDMasked512Merging,
ssa.OpAMD64VPMADDUBSWMasked128Merging,
ssa.OpAMD64VPMADDUBSWMasked256Merging,
ssa.OpAMD64VPMADDUBSWMasked512Merging,
ssa.OpAMD64VGF2P8MULBMasked128Merging,
ssa.OpAMD64VGF2P8MULBMasked256Merging,
ssa.OpAMD64VGF2P8MULBMasked512Merging,
ssa.OpAMD64VMAXPSMasked128Merging,
ssa.OpAMD64VMAXPSMasked256Merging,
ssa.OpAMD64VMAXPSMasked512Merging,
ssa.OpAMD64VMAXPDMasked128Merging,
ssa.OpAMD64VMAXPDMasked256Merging,
ssa.OpAMD64VMAXPDMasked512Merging,
ssa.OpAMD64VPMAXSBMasked128Merging,
ssa.OpAMD64VPMAXSBMasked256Merging,
ssa.OpAMD64VPMAXSBMasked512Merging,
ssa.OpAMD64VPMAXSWMasked128Merging,
ssa.OpAMD64VPMAXSWMasked256Merging,
ssa.OpAMD64VPMAXSWMasked512Merging,
ssa.OpAMD64VPMAXSDMasked128Merging,
ssa.OpAMD64VPMAXSDMasked256Merging,
ssa.OpAMD64VPMAXSDMasked512Merging,
ssa.OpAMD64VPMAXSQMasked128Merging,
ssa.OpAMD64VPMAXSQMasked256Merging,
ssa.OpAMD64VPMAXSQMasked512Merging,
ssa.OpAMD64VPMAXUBMasked128Merging,
ssa.OpAMD64VPMAXUBMasked256Merging,
ssa.OpAMD64VPMAXUBMasked512Merging,
ssa.OpAMD64VPMAXUWMasked128Merging,
ssa.OpAMD64VPMAXUWMasked256Merging,
ssa.OpAMD64VPMAXUWMasked512Merging,
ssa.OpAMD64VPMAXUDMasked128Merging,
ssa.OpAMD64VPMAXUDMasked256Merging,
ssa.OpAMD64VPMAXUDMasked512Merging,
ssa.OpAMD64VPMAXUQMasked128Merging,
ssa.OpAMD64VPMAXUQMasked256Merging,
ssa.OpAMD64VPMAXUQMasked512Merging,
ssa.OpAMD64VMINPSMasked128Merging,
ssa.OpAMD64VMINPSMasked256Merging,
ssa.OpAMD64VMINPSMasked512Merging,
ssa.OpAMD64VMINPDMasked128Merging,
ssa.OpAMD64VMINPDMasked256Merging,
ssa.OpAMD64VMINPDMasked512Merging,
ssa.OpAMD64VPMINSBMasked128Merging,
ssa.OpAMD64VPMINSBMasked256Merging,
ssa.OpAMD64VPMINSBMasked512Merging,
ssa.OpAMD64VPMINSWMasked128Merging,
ssa.OpAMD64VPMINSWMasked256Merging,
ssa.OpAMD64VPMINSWMasked512Merging,
ssa.OpAMD64VPMINSDMasked128Merging,
ssa.OpAMD64VPMINSDMasked256Merging,
ssa.OpAMD64VPMINSDMasked512Merging,
ssa.OpAMD64VPMINSQMasked128Merging,
ssa.OpAMD64VPMINSQMasked256Merging,
ssa.OpAMD64VPMINSQMasked512Merging,
ssa.OpAMD64VPMINUBMasked128Merging,
ssa.OpAMD64VPMINUBMasked256Merging,
ssa.OpAMD64VPMINUBMasked512Merging,
ssa.OpAMD64VPMINUWMasked128Merging,
ssa.OpAMD64VPMINUWMasked256Merging,
ssa.OpAMD64VPMINUWMasked512Merging,
ssa.OpAMD64VPMINUDMasked128Merging,
ssa.OpAMD64VPMINUDMasked256Merging,
ssa.OpAMD64VPMINUDMasked512Merging,
ssa.OpAMD64VPMINUQMasked128Merging,
ssa.OpAMD64VPMINUQMasked256Merging,
ssa.OpAMD64VPMINUQMasked512Merging,
ssa.OpAMD64VFMADD213PSMasked128,
ssa.OpAMD64VFMADD213PSMasked256,
ssa.OpAMD64VFMADD213PSMasked512,
@ -1237,12 +1354,39 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VFMADDSUB213PDMasked128,
ssa.OpAMD64VFMADDSUB213PDMasked256,
ssa.OpAMD64VFMADDSUB213PDMasked512,
ssa.OpAMD64VPMULHWMasked128Merging,
ssa.OpAMD64VPMULHWMasked256Merging,
ssa.OpAMD64VPMULHWMasked512Merging,
ssa.OpAMD64VPMULHUWMasked128Merging,
ssa.OpAMD64VPMULHUWMasked256Merging,
ssa.OpAMD64VPMULHUWMasked512Merging,
ssa.OpAMD64VMULPSMasked128Merging,
ssa.OpAMD64VMULPSMasked256Merging,
ssa.OpAMD64VMULPSMasked512Merging,
ssa.OpAMD64VMULPDMasked128Merging,
ssa.OpAMD64VMULPDMasked256Merging,
ssa.OpAMD64VMULPDMasked512Merging,
ssa.OpAMD64VPMULLWMasked128Merging,
ssa.OpAMD64VPMULLWMasked256Merging,
ssa.OpAMD64VPMULLWMasked512Merging,
ssa.OpAMD64VPMULLDMasked128Merging,
ssa.OpAMD64VPMULLDMasked256Merging,
ssa.OpAMD64VPMULLDMasked512Merging,
ssa.OpAMD64VPMULLQMasked128Merging,
ssa.OpAMD64VPMULLQMasked256Merging,
ssa.OpAMD64VPMULLQMasked512Merging,
ssa.OpAMD64VFMSUBADD213PSMasked128,
ssa.OpAMD64VFMSUBADD213PSMasked256,
ssa.OpAMD64VFMSUBADD213PSMasked512,
ssa.OpAMD64VFMSUBADD213PDMasked128,
ssa.OpAMD64VFMSUBADD213PDMasked256,
ssa.OpAMD64VFMSUBADD213PDMasked512,
ssa.OpAMD64VPORDMasked128Merging,
ssa.OpAMD64VPORDMasked256Merging,
ssa.OpAMD64VPORDMasked512Merging,
ssa.OpAMD64VPORQMasked128Merging,
ssa.OpAMD64VPORQMasked256Merging,
ssa.OpAMD64VPORQMasked512Merging,
ssa.OpAMD64VPERMI2BMasked128,
ssa.OpAMD64VPERMI2BMasked256,
ssa.OpAMD64VPERMI2BMasked512,
@ -1261,6 +1405,45 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPERMI2QMasked256,
ssa.OpAMD64VPERMI2PDMasked512,
ssa.OpAMD64VPERMI2QMasked512,
ssa.OpAMD64VPSHUFBMasked256Merging,
ssa.OpAMD64VPSHUFBMasked512Merging,
ssa.OpAMD64VPSHUFBMasked128Merging,
ssa.OpAMD64VPROLVDMasked128Merging,
ssa.OpAMD64VPROLVDMasked256Merging,
ssa.OpAMD64VPROLVDMasked512Merging,
ssa.OpAMD64VPROLVQMasked128Merging,
ssa.OpAMD64VPROLVQMasked256Merging,
ssa.OpAMD64VPROLVQMasked512Merging,
ssa.OpAMD64VPRORVDMasked128Merging,
ssa.OpAMD64VPRORVDMasked256Merging,
ssa.OpAMD64VPRORVDMasked512Merging,
ssa.OpAMD64VPRORVQMasked128Merging,
ssa.OpAMD64VPRORVQMasked256Merging,
ssa.OpAMD64VPRORVQMasked512Merging,
ssa.OpAMD64VSCALEFPSMasked128Merging,
ssa.OpAMD64VSCALEFPSMasked256Merging,
ssa.OpAMD64VSCALEFPSMasked512Merging,
ssa.OpAMD64VSCALEFPDMasked128Merging,
ssa.OpAMD64VSCALEFPDMasked256Merging,
ssa.OpAMD64VSCALEFPDMasked512Merging,
ssa.OpAMD64VPSHLDWMasked128Merging,
ssa.OpAMD64VPSHLDWMasked256Merging,
ssa.OpAMD64VPSHLDWMasked512Merging,
ssa.OpAMD64VPSHLDDMasked128Merging,
ssa.OpAMD64VPSHLDDMasked256Merging,
ssa.OpAMD64VPSHLDDMasked512Merging,
ssa.OpAMD64VPSHLDQMasked128Merging,
ssa.OpAMD64VPSHLDQMasked256Merging,
ssa.OpAMD64VPSHLDQMasked512Merging,
ssa.OpAMD64VPSHRDWMasked128Merging,
ssa.OpAMD64VPSHRDWMasked256Merging,
ssa.OpAMD64VPSHRDWMasked512Merging,
ssa.OpAMD64VPSHRDDMasked128Merging,
ssa.OpAMD64VPSHRDDMasked256Merging,
ssa.OpAMD64VPSHRDDMasked512Merging,
ssa.OpAMD64VPSHRDQMasked128Merging,
ssa.OpAMD64VPSHRDQMasked256Merging,
ssa.OpAMD64VPSHRDQMasked512Merging,
ssa.OpAMD64VPSHLDVWMasked128,
ssa.OpAMD64VPSHLDVWMasked256,
ssa.OpAMD64VPSHLDVWMasked512,
@ -1270,6 +1453,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPSHLDVQMasked128,
ssa.OpAMD64VPSHLDVQMasked256,
ssa.OpAMD64VPSHLDVQMasked512,
ssa.OpAMD64VPSLLVWMasked128Merging,
ssa.OpAMD64VPSLLVWMasked256Merging,
ssa.OpAMD64VPSLLVWMasked512Merging,
ssa.OpAMD64VPSLLVDMasked128Merging,
ssa.OpAMD64VPSLLVDMasked256Merging,
ssa.OpAMD64VPSLLVDMasked512Merging,
ssa.OpAMD64VPSLLVQMasked128Merging,
ssa.OpAMD64VPSLLVQMasked256Merging,
ssa.OpAMD64VPSLLVQMasked512Merging,
ssa.OpAMD64VPSHRDVWMasked128,
ssa.OpAMD64VPSHRDVWMasked256,
ssa.OpAMD64VPSHRDVWMasked512,
@ -1278,7 +1470,61 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPSHRDVDMasked512,
ssa.OpAMD64VPSHRDVQMasked128,
ssa.OpAMD64VPSHRDVQMasked256,
ssa.OpAMD64VPSHRDVQMasked512:
ssa.OpAMD64VPSHRDVQMasked512,
ssa.OpAMD64VPSRAVWMasked128Merging,
ssa.OpAMD64VPSRAVWMasked256Merging,
ssa.OpAMD64VPSRAVWMasked512Merging,
ssa.OpAMD64VPSRAVDMasked128Merging,
ssa.OpAMD64VPSRAVDMasked256Merging,
ssa.OpAMD64VPSRAVDMasked512Merging,
ssa.OpAMD64VPSRAVQMasked128Merging,
ssa.OpAMD64VPSRAVQMasked256Merging,
ssa.OpAMD64VPSRAVQMasked512Merging,
ssa.OpAMD64VPSRLVWMasked128Merging,
ssa.OpAMD64VPSRLVWMasked256Merging,
ssa.OpAMD64VPSRLVWMasked512Merging,
ssa.OpAMD64VPSRLVDMasked128Merging,
ssa.OpAMD64VPSRLVDMasked256Merging,
ssa.OpAMD64VPSRLVDMasked512Merging,
ssa.OpAMD64VPSRLVQMasked128Merging,
ssa.OpAMD64VPSRLVQMasked256Merging,
ssa.OpAMD64VPSRLVQMasked512Merging,
ssa.OpAMD64VSUBPSMasked128Merging,
ssa.OpAMD64VSUBPSMasked256Merging,
ssa.OpAMD64VSUBPSMasked512Merging,
ssa.OpAMD64VSUBPDMasked128Merging,
ssa.OpAMD64VSUBPDMasked256Merging,
ssa.OpAMD64VSUBPDMasked512Merging,
ssa.OpAMD64VPSUBBMasked128Merging,
ssa.OpAMD64VPSUBBMasked256Merging,
ssa.OpAMD64VPSUBBMasked512Merging,
ssa.OpAMD64VPSUBWMasked128Merging,
ssa.OpAMD64VPSUBWMasked256Merging,
ssa.OpAMD64VPSUBWMasked512Merging,
ssa.OpAMD64VPSUBDMasked128Merging,
ssa.OpAMD64VPSUBDMasked256Merging,
ssa.OpAMD64VPSUBDMasked512Merging,
ssa.OpAMD64VPSUBQMasked128Merging,
ssa.OpAMD64VPSUBQMasked256Merging,
ssa.OpAMD64VPSUBQMasked512Merging,
ssa.OpAMD64VPSUBSBMasked128Merging,
ssa.OpAMD64VPSUBSBMasked256Merging,
ssa.OpAMD64VPSUBSBMasked512Merging,
ssa.OpAMD64VPSUBSWMasked128Merging,
ssa.OpAMD64VPSUBSWMasked256Merging,
ssa.OpAMD64VPSUBSWMasked512Merging,
ssa.OpAMD64VPSUBUSBMasked128Merging,
ssa.OpAMD64VPSUBUSBMasked256Merging,
ssa.OpAMD64VPSUBUSBMasked512Merging,
ssa.OpAMD64VPSUBUSWMasked128Merging,
ssa.OpAMD64VPSUBUSWMasked256Merging,
ssa.OpAMD64VPSUBUSWMasked512Merging,
ssa.OpAMD64VPXORDMasked128Merging,
ssa.OpAMD64VPXORDMasked256Merging,
ssa.OpAMD64VPXORDMasked512Merging,
ssa.OpAMD64VPXORQMasked128Merging,
ssa.OpAMD64VPXORQMasked256Merging,
ssa.OpAMD64VPXORQMasked512Merging:
p = simdV3kvResultInArg0(s, v)
case ssa.OpAMD64VPSLLW128,
@ -1979,6 +2225,199 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
case ssa.OpAMD64SHA256RNDS2128:
p = simdV31x0AtIn2ResultInArg0(s, v)
case ssa.OpAMD64VPABSBMasked128Merging,
ssa.OpAMD64VPABSBMasked256Merging,
ssa.OpAMD64VPABSBMasked512Merging,
ssa.OpAMD64VPABSWMasked128Merging,
ssa.OpAMD64VPABSWMasked256Merging,
ssa.OpAMD64VPABSWMasked512Merging,
ssa.OpAMD64VPABSDMasked128Merging,
ssa.OpAMD64VPABSDMasked256Merging,
ssa.OpAMD64VPABSDMasked512Merging,
ssa.OpAMD64VPABSQMasked128Merging,
ssa.OpAMD64VPABSQMasked256Merging,
ssa.OpAMD64VPABSQMasked512Merging,
ssa.OpAMD64VBROADCASTSSMasked128Merging,
ssa.OpAMD64VPBROADCASTQMasked128Merging,
ssa.OpAMD64VPBROADCASTBMasked128Merging,
ssa.OpAMD64VPBROADCASTWMasked128Merging,
ssa.OpAMD64VPBROADCASTDMasked128Merging,
ssa.OpAMD64VBROADCASTSSMasked256Merging,
ssa.OpAMD64VBROADCASTSDMasked256Merging,
ssa.OpAMD64VPBROADCASTBMasked256Merging,
ssa.OpAMD64VPBROADCASTWMasked256Merging,
ssa.OpAMD64VPBROADCASTDMasked256Merging,
ssa.OpAMD64VPBROADCASTQMasked256Merging,
ssa.OpAMD64VBROADCASTSSMasked512Merging,
ssa.OpAMD64VBROADCASTSDMasked512Merging,
ssa.OpAMD64VPBROADCASTBMasked512Merging,
ssa.OpAMD64VPBROADCASTWMasked512Merging,
ssa.OpAMD64VPBROADCASTDMasked512Merging,
ssa.OpAMD64VPBROADCASTQMasked512Merging,
ssa.OpAMD64VRNDSCALEPSMasked128Merging,
ssa.OpAMD64VRNDSCALEPSMasked256Merging,
ssa.OpAMD64VRNDSCALEPSMasked512Merging,
ssa.OpAMD64VRNDSCALEPDMasked128Merging,
ssa.OpAMD64VRNDSCALEPDMasked256Merging,
ssa.OpAMD64VRNDSCALEPDMasked512Merging,
ssa.OpAMD64VREDUCEPSMasked128Merging,
ssa.OpAMD64VREDUCEPSMasked256Merging,
ssa.OpAMD64VREDUCEPSMasked512Merging,
ssa.OpAMD64VREDUCEPDMasked128Merging,
ssa.OpAMD64VREDUCEPDMasked256Merging,
ssa.OpAMD64VREDUCEPDMasked512Merging,
ssa.OpAMD64VPMOVWBMasked128Merging,
ssa.OpAMD64VPMOVWBMasked256Merging,
ssa.OpAMD64VPMOVDBMasked128Merging,
ssa.OpAMD64VPMOVQBMasked128Merging,
ssa.OpAMD64VPMOVSWBMasked128Merging,
ssa.OpAMD64VPMOVSWBMasked256Merging,
ssa.OpAMD64VPMOVSDBMasked128Merging,
ssa.OpAMD64VPMOVSQBMasked128Merging,
ssa.OpAMD64VPMOVSXBWMasked256Merging,
ssa.OpAMD64VPMOVSXBWMasked512Merging,
ssa.OpAMD64VPMOVDWMasked128Merging,
ssa.OpAMD64VPMOVDWMasked256Merging,
ssa.OpAMD64VPMOVQWMasked128Merging,
ssa.OpAMD64VPMOVSDWMasked128Merging,
ssa.OpAMD64VPMOVSDWMasked256Merging,
ssa.OpAMD64VPMOVSQWMasked128Merging,
ssa.OpAMD64VPMOVSXBWMasked128Merging,
ssa.OpAMD64VCVTTPS2DQMasked128Merging,
ssa.OpAMD64VCVTTPS2DQMasked256Merging,
ssa.OpAMD64VCVTTPS2DQMasked512Merging,
ssa.OpAMD64VPMOVSXBDMasked512Merging,
ssa.OpAMD64VPMOVSXWDMasked256Merging,
ssa.OpAMD64VPMOVSXWDMasked512Merging,
ssa.OpAMD64VPMOVQDMasked128Merging,
ssa.OpAMD64VPMOVQDMasked256Merging,
ssa.OpAMD64VPMOVSQDMasked128Merging,
ssa.OpAMD64VPMOVSQDMasked256Merging,
ssa.OpAMD64VPMOVSXBDMasked128Merging,
ssa.OpAMD64VPMOVSXWDMasked128Merging,
ssa.OpAMD64VPMOVSXBDMasked256Merging,
ssa.OpAMD64VPMOVSXWQMasked512Merging,
ssa.OpAMD64VPMOVSXDQMasked256Merging,
ssa.OpAMD64VPMOVSXDQMasked512Merging,
ssa.OpAMD64VPMOVSXBQMasked128Merging,
ssa.OpAMD64VPMOVSXWQMasked128Merging,
ssa.OpAMD64VPMOVSXDQMasked128Merging,
ssa.OpAMD64VPMOVSXBQMasked256Merging,
ssa.OpAMD64VPMOVSXBQMasked512Merging,
ssa.OpAMD64VPMOVUSWBMasked128Merging,
ssa.OpAMD64VPMOVUSWBMasked256Merging,
ssa.OpAMD64VPMOVUSDBMasked128Merging,
ssa.OpAMD64VPMOVUSQBMasked128Merging,
ssa.OpAMD64VPMOVZXBWMasked256Merging,
ssa.OpAMD64VPMOVZXBWMasked512Merging,
ssa.OpAMD64VPMOVUSDWMasked128Merging,
ssa.OpAMD64VPMOVUSDWMasked256Merging,
ssa.OpAMD64VPMOVUSQWMasked128Merging,
ssa.OpAMD64VPMOVZXBWMasked128Merging,
ssa.OpAMD64VCVTPS2UDQMasked128Merging,
ssa.OpAMD64VCVTPS2UDQMasked256Merging,
ssa.OpAMD64VCVTPS2UDQMasked512Merging,
ssa.OpAMD64VPMOVZXBDMasked512Merging,
ssa.OpAMD64VPMOVZXWDMasked256Merging,
ssa.OpAMD64VPMOVZXWDMasked512Merging,
ssa.OpAMD64VPMOVUSQDMasked128Merging,
ssa.OpAMD64VPMOVUSQDMasked256Merging,
ssa.OpAMD64VPMOVZXBDMasked128Merging,
ssa.OpAMD64VPMOVZXWDMasked128Merging,
ssa.OpAMD64VPMOVZXBDMasked256Merging,
ssa.OpAMD64VPMOVZXWQMasked512Merging,
ssa.OpAMD64VPMOVZXDQMasked256Merging,
ssa.OpAMD64VPMOVZXDQMasked512Merging,
ssa.OpAMD64VPMOVZXBQMasked128Merging,
ssa.OpAMD64VPMOVZXWQMasked128Merging,
ssa.OpAMD64VPMOVZXDQMasked128Merging,
ssa.OpAMD64VPMOVSXWQMasked256Merging,
ssa.OpAMD64VPMOVZXBQMasked256Merging,
ssa.OpAMD64VPMOVZXWQMasked256Merging,
ssa.OpAMD64VPMOVZXBQMasked512Merging,
ssa.OpAMD64VPLZCNTDMasked128Merging,
ssa.OpAMD64VPLZCNTDMasked256Merging,
ssa.OpAMD64VPLZCNTDMasked512Merging,
ssa.OpAMD64VPLZCNTQMasked128Merging,
ssa.OpAMD64VPLZCNTQMasked256Merging,
ssa.OpAMD64VPLZCNTQMasked512Merging,
ssa.OpAMD64VPOPCNTBMasked128Merging,
ssa.OpAMD64VPOPCNTBMasked256Merging,
ssa.OpAMD64VPOPCNTBMasked512Merging,
ssa.OpAMD64VPOPCNTWMasked128Merging,
ssa.OpAMD64VPOPCNTWMasked256Merging,
ssa.OpAMD64VPOPCNTWMasked512Merging,
ssa.OpAMD64VPOPCNTDMasked128Merging,
ssa.OpAMD64VPOPCNTDMasked256Merging,
ssa.OpAMD64VPOPCNTDMasked512Merging,
ssa.OpAMD64VPOPCNTQMasked128Merging,
ssa.OpAMD64VPOPCNTQMasked256Merging,
ssa.OpAMD64VPOPCNTQMasked512Merging,
ssa.OpAMD64VPSHUFDMasked256Merging,
ssa.OpAMD64VPSHUFDMasked512Merging,
ssa.OpAMD64VPSHUFHWMasked256Merging,
ssa.OpAMD64VPSHUFHWMasked512Merging,
ssa.OpAMD64VPSHUFHWMasked128Merging,
ssa.OpAMD64VPSHUFDMasked128Merging,
ssa.OpAMD64VRCP14PSMasked128Merging,
ssa.OpAMD64VRCP14PSMasked256Merging,
ssa.OpAMD64VRCP14PSMasked512Merging,
ssa.OpAMD64VRCP14PDMasked128Merging,
ssa.OpAMD64VRCP14PDMasked256Merging,
ssa.OpAMD64VRCP14PDMasked512Merging,
ssa.OpAMD64VRSQRT14PSMasked128Merging,
ssa.OpAMD64VRSQRT14PSMasked256Merging,
ssa.OpAMD64VRSQRT14PSMasked512Merging,
ssa.OpAMD64VRSQRT14PDMasked128Merging,
ssa.OpAMD64VRSQRT14PDMasked256Merging,
ssa.OpAMD64VRSQRT14PDMasked512Merging,
ssa.OpAMD64VPROLDMasked128Merging,
ssa.OpAMD64VPROLDMasked256Merging,
ssa.OpAMD64VPROLDMasked512Merging,
ssa.OpAMD64VPROLQMasked128Merging,
ssa.OpAMD64VPROLQMasked256Merging,
ssa.OpAMD64VPROLQMasked512Merging,
ssa.OpAMD64VPRORDMasked128Merging,
ssa.OpAMD64VPRORDMasked256Merging,
ssa.OpAMD64VPRORDMasked512Merging,
ssa.OpAMD64VPRORQMasked128Merging,
ssa.OpAMD64VPRORQMasked256Merging,
ssa.OpAMD64VPRORQMasked512Merging,
ssa.OpAMD64VSQRTPSMasked128Merging,
ssa.OpAMD64VSQRTPSMasked256Merging,
ssa.OpAMD64VSQRTPSMasked512Merging,
ssa.OpAMD64VSQRTPDMasked128Merging,
ssa.OpAMD64VSQRTPDMasked256Merging,
ssa.OpAMD64VSQRTPDMasked512Merging,
ssa.OpAMD64VPSLLWMasked128constMerging,
ssa.OpAMD64VPSLLWMasked256constMerging,
ssa.OpAMD64VPSLLWMasked512constMerging,
ssa.OpAMD64VPSLLDMasked128constMerging,
ssa.OpAMD64VPSLLDMasked256constMerging,
ssa.OpAMD64VPSLLDMasked512constMerging,
ssa.OpAMD64VPSLLQMasked128constMerging,
ssa.OpAMD64VPSLLQMasked256constMerging,
ssa.OpAMD64VPSLLQMasked512constMerging,
ssa.OpAMD64VPSRLWMasked128constMerging,
ssa.OpAMD64VPSRLWMasked256constMerging,
ssa.OpAMD64VPSRLWMasked512constMerging,
ssa.OpAMD64VPSRLDMasked128constMerging,
ssa.OpAMD64VPSRLDMasked256constMerging,
ssa.OpAMD64VPSRLDMasked512constMerging,
ssa.OpAMD64VPSRLQMasked128constMerging,
ssa.OpAMD64VPSRLQMasked256constMerging,
ssa.OpAMD64VPSRLQMasked512constMerging,
ssa.OpAMD64VPSRAWMasked128constMerging,
ssa.OpAMD64VPSRAWMasked256constMerging,
ssa.OpAMD64VPSRAWMasked512constMerging,
ssa.OpAMD64VPSRADMasked128constMerging,
ssa.OpAMD64VPSRADMasked256constMerging,
ssa.OpAMD64VPSRADMasked512constMerging,
ssa.OpAMD64VPSRAQMasked128constMerging,
ssa.OpAMD64VPSRAQMasked256constMerging,
ssa.OpAMD64VPSRAQMasked512constMerging:
p = simdV2kvResultInArg0(s, v)
default:
// Unknown reg shape
return false
@ -2843,12 +3282,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPXORQMasked256load,
ssa.OpAMD64VPXORQMasked512,
ssa.OpAMD64VPXORQMasked512load,
ssa.OpAMD64VMOVUPSMasked128,
ssa.OpAMD64VMOVUPSMasked256,
ssa.OpAMD64VMOVUPSMasked512,
ssa.OpAMD64VMOVUPDMasked128,
ssa.OpAMD64VMOVUPDMasked256,
ssa.OpAMD64VMOVUPDMasked512,
ssa.OpAMD64VMOVDQU8Masked128,
ssa.OpAMD64VMOVDQU8Masked256,
ssa.OpAMD64VMOVDQU8Masked512,

View file

@ -1963,6 +1963,22 @@ func simdV2kv(s *ssagen.State, v *ssa.Value) *obj.Prog {
return p
}
// Example instruction: VPABSB X1, X2, K3 (masking merging)
func simdV2kvResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog {
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = simdReg(v.Args[1])
// These "simd*" series of functions assumes:
// Any "K" register that serves as the write-mask
// or "predicate" for "predicated AVX512 instructions"
// sits right at the end of the operand list.
// TODO: verify this assumption.
p.AddRestSourceReg(maskReg(v.Args[2]))
p.To.Type = obj.TYPE_REG
p.To.Reg = simdReg(v)
return p
}
// This function is to accustomize the shifts.
// The 2nd arg is an XMM, and this function merely checks that.
// Example instruction: VPSLLQ Z1, X1, K1, Z2

View file

@ -213,7 +213,7 @@ func init() {
vloadk = regInfo{inputs: []regMask{gpspsb, mask, 0}, outputs: vonly}
vstorek = regInfo{inputs: []regMask{gpspsb, mask, v, 0}}
v11 = regInfo{inputs: vzonly, outputs: vonly}
v11 = regInfo{inputs: vonly, outputs: vonly} // used in resultInArg0 ops, arg0 must not be x15
v21 = regInfo{inputs: []regMask{v, vz}, outputs: vonly} // used in resultInArg0 ops, arg0 must not be x15
vk = regInfo{inputs: vzonly, outputs: maskonly}
kv = regInfo{inputs: maskonly, outputs: vonly}
@ -231,13 +231,13 @@ func init() {
gpv = regInfo{inputs: []regMask{gp}, outputs: vonly}
v2flags = regInfo{inputs: []regMask{vz, vz}}
w11 = regInfo{inputs: wzonly, outputs: wonly}
w11 = regInfo{inputs: wonly, outputs: wonly} // used in resultInArg0 ops, arg0 must not be x15
w21 = regInfo{inputs: []regMask{wz, wz}, outputs: wonly}
wk = regInfo{inputs: wzonly, outputs: maskonly}
kw = regInfo{inputs: maskonly, outputs: wonly}
w2k = regInfo{inputs: []regMask{wz, wz}, outputs: maskonly}
wkw = regInfo{inputs: []regMask{wz, mask}, outputs: wonly}
w2kw = regInfo{inputs: []regMask{wz, wz, mask}, outputs: wonly}
w2kw = regInfo{inputs: []regMask{w, wz, mask}, outputs: wonly} // used in resultInArg0 ops, arg0 must not be x15
w2kk = regInfo{inputs: []regMask{wz, wz, mask}, outputs: maskonly}
w31 = regInfo{inputs: []regMask{w, wz, wz}, outputs: wonly} // used in resultInArg0 ops, arg0 must not be x15
w3kw = regInfo{inputs: []regMask{w, wz, wz, mask}, outputs: wonly} // used in resultInArg0 ops, arg0 must not be x15

View file

@ -1862,6 +1862,424 @@
(VMOVDQU64Masked128 (VPSRAQ128const [a] x) mask) => (VPSRAQMasked128const [a] x mask)
(VMOVDQU64Masked256 (VPSRAQ256const [a] x) mask) => (VPSRAQMasked256const [a] x mask)
(VMOVDQU64Masked512 (VPSRAQ512const [a] x) mask) => (VPSRAQMasked512const [a] x mask)
(VPBLENDMQMasked512 dst (VPSLLQ512const [a] x) mask) => (VPSLLQMasked512constMerging dst [a] x mask)
(VPBLENDVB256 dst (VPMOVSXBW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VPMOVSDW256 x) mask) => (VPMOVSDWMasked256Merging dst x mask)
(VPBLENDMDMasked512 dst (VPLZCNTD512 x) mask) => (VPLZCNTDMasked512Merging dst x mask)
(VPBLENDMWMasked512 dst (VPMAXSW512 x y) mask) => (VPMAXSWMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPMINUD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDMWMasked512 dst (VPMULHW512 x y) mask) => (VPMULHWMasked512Merging dst x y mask)
(VPBLENDMDMasked512 dst (VPMULLD512 x y) mask) => (VPMULLDMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPROLQ128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLQMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMADDUBSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDUBSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMAXSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPADDSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPADDUSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VBROADCASTSS256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSSMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMOVSXBW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMINSQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VMULPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDMBMasked512 dst (VGF2P8MULB512 x y) mask) => (VGF2P8MULBMasked512Merging dst x y mask)
(VPBLENDMDMasked512 dst (VMAXPS512 x y) mask) => (VMAXPSMasked512Merging dst x y mask)
(VPBLENDVB256 dst (VPOPCNTB256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTBMasked256Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VSUBPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VPSUBQ512 x y) mask) => (VPSUBQMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPSUBUSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMOVSXBQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBQMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VPMOVUSDB128 x) mask) => (VPMOVUSDBMasked128Merging dst x mask)
(VPBLENDVB256 dst (VPMAXUQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VRSQRT14PS512 x) mask) => (VRSQRT14PSMasked512Merging dst x mask)
(VPBLENDVB256 dst (VPROLD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VPROLQ512 [a] x) mask) => (VPROLQMasked512Merging dst [a] x mask)
(VPBLENDMQMasked512 dst (VPSLLVQ512 x y) mask) => (VPSLLVQMasked512Merging dst x y mask)
(VPBLENDVB256 dst (VPSRAVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VADDPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VADDPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMOVSXDQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMOVUSWB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSWBMasked128Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMOVZXWQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWQMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMULLW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDMBMasked512 dst (VPOPCNTB512 x) mask) => (VPOPCNTBMasked512Merging dst x mask)
(VPBLENDVB128 dst (VPSHLDQ128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSRAQ256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VPMOVDW256 x) mask) => (VPMOVDWMasked256Merging dst x mask)
(VPBLENDMQMasked512 dst (VPMOVUSQB128 x) mask) => (VPMOVUSQBMasked128Merging dst x mask)
(VPBLENDVB256 dst (VCVTPS2UDQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTPS2UDQMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMOVZXBQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBQMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMAXSQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMINSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPOPCNTW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VRCP14PS512 x) mask) => (VRCP14PSMasked512Merging dst x mask)
(VPBLENDVB128 dst (VPBROADCASTW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDMWMasked512 dst (VPMOVWB256 x) mask) => (VPMOVWBMasked256Merging dst x mask)
(VPBLENDVB128 dst (VPRORVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSHLDD256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSLLVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSRLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSUBUSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VREDUCEPS512 [a] x) mask) => (VREDUCEPSMasked512Merging dst [a] x mask)
(VPBLENDVB256 dst (VPMAXSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VMINPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VPADDQ512 x y) mask) => (VPADDQMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VBROADCASTSD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSDMasked256Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VRNDSCALEPD512 [a] x) mask) => (VRNDSCALEPDMasked512Merging dst [a] x mask)
(VPBLENDVB128 dst (VPMOVZXDQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMINSD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSRAQ128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPADDSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VRNDSCALEPS256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPACKUSDW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKUSDWMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDMWMasked512 dst (VPMADDUBSW512 x y) mask) => (VPMADDUBSWMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPLZCNTD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMAXUD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPOPCNTB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPROLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VPABSQ512 x) mask) => (VPABSQMasked512Merging dst x mask)
(VPBLENDVB128 dst (VBROADCASTSD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSDMasked512Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VMINPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMULHW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDMWMasked512 dst (VPSHLDW512 [a] x y) mask) => (VPSHLDWMasked512Merging dst [a] x y mask)
(VPBLENDVB128 dst (VPSHRDW128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VADDPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VADDPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMOVZXWD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWDMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMOVSXWQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VDIVPS512 x y) mask) => (VDIVPSMasked512Merging dst x y mask)
(VPBLENDVB256 dst (VDIVPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPLZCNTQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSUBSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VREDUCEPD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VPMOVUSQD256 x) mask) => (VPMOVUSQDMasked256Merging dst x mask)
(VPBLENDVB128 dst (VPMOVZXBD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDMWMasked512 dst (VPMULHUW512 x y) mask) => (VPMULHUWMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPRORQ128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORQMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSLLVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSRLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDMBMasked512 dst (VPSUBSB512 x y) mask) => (VPSUBSBMasked512Merging dst x y mask)
(VPBLENDVB256 dst (VPADDD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMOVSXBW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMOVSDW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSDWMasked128Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMINSD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VADDPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VADDPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VADDPD512 x y) mask) => (VADDPDMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPMOVSXBD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBDMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMOVSXDQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDMWMasked512 dst (VPMOVUSWB256 x) mask) => (VPMOVUSWBMasked256Merging dst x mask)
(VPBLENDVB256 dst (VPOPCNTD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPROLVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSRLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPADDUSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMAXSD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMINUB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMULLQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VSQRTPD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSUBD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VREDUCEPS256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDMWMasked512 dst (VPMINSW512 x y) mask) => (VPMINSWMasked512Merging dst x y mask)
(VPBLENDMQMasked512 dst (VRCP14PD512 x) mask) => (VRCP14PDMasked512Merging dst x mask)
(VPBLENDMWMasked512 dst (VPSRAVW512 x y) mask) => (VPSRAVWMasked512Merging dst x y mask)
(VPBLENDMDMasked512 dst (VPSRLVD512 x y) mask) => (VPSRLVDMasked512Merging dst x y mask)
(VPBLENDMDMasked512 dst (VPSUBD512 x y) mask) => (VPSUBDMasked512Merging dst x y mask)
(VPBLENDVB256 dst (VPSUBQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPBROADCASTD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTDMasked512Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMOVSXWD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked512Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMADDWD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDWDMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VGF2P8MULB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VGF2P8MULBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPROLD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSLLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSRAD128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRADMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSRLVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSUBUSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPADDUSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMOVZXBW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMOVZXDQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPROLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPADDB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VPROLD512 [a] x) mask) => (VPROLDMasked512Merging dst [a] x mask)
(VPBLENDMQMasked512 dst (VPSRLVQ512 x y) mask) => (VPSRLVQMasked512Merging dst x y mask)
(VPBLENDMBMasked512 dst (VPSUBB512 x y) mask) => (VPSUBBMasked512Merging dst x y mask)
(VPBLENDVB256 dst (VPADDW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPADDQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPADDUSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPBROADCASTB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VRNDSCALEPS128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VREDUCEPD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMINUW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VPORD512 x y) mask) => (VPORDMasked512Merging dst x y mask)
(VPBLENDVB256 dst (VRNDSCALEPD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMINSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMULLD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSHUFB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPRORD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPRORVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VPRORVQ512 x y) mask) => (VPRORVQMasked512Merging dst x y mask)
(VPBLENDVB256 dst (VPSHLDW256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VCVTTPS2DQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTTPS2DQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VCVTTPS2DQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTTPS2DQMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VMINPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VPSHLDD512 [a] x y) mask) => (VPSHLDDMasked512Merging dst [a] x y mask)
(VPBLENDMQMasked512 dst (VPSRAVQ512 x y) mask) => (VPSRAVQMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VSUBPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VSUBPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSUBD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDMWMasked512 dst (VPADDW512 x y) mask) => (VPADDWMasked512Merging dst x y mask)
(VPBLENDMQMasked512 dst (VPANDQ512 x y) mask) => (VPANDQMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPBROADCASTB512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTBMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VPACKUSDW512 x y) mask) => (VPACKUSDWMasked512Merging dst x y mask)
(VPBLENDMWMasked512 dst (VPSHUFHW512 [a] x) mask) => (VPSHUFHWMasked512Merging dst [a] x mask)
(VPBLENDVB128 dst (VRCP14PD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRCP14PDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDMWMasked512 dst (VPSHRDW512 [a] x y) mask) => (VPSHRDWMasked512Merging dst [a] x y mask)
(VPBLENDVB256 dst (VSQRTPS256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPSMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDMWMasked512 dst (VPSUBSW512 x y) mask) => (VPSUBSWMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPMOVSXWD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPBROADCASTW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTWMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPBROADCASTD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTDMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VPMOVQB128 x) mask) => (VPMOVQBMasked128Merging dst x mask)
(VPBLENDVB256 dst (VPACKUSDW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKUSDWMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDMBMasked512 dst (VPMINSB512 x y) mask) => (VPMINSBMasked512Merging dst x y mask)
(VPBLENDVB256 dst (VPMULLD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPADDB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
(VPBLENDMBMasked512 dst (VPADDB512 x y) mask) => (VPADDBMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPADDD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMOVWB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVWBMasked128Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMADDWD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDWDMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VPMAXSD512 x y) mask) => (VPMAXSDMasked512Merging dst x y mask)
(VPBLENDMQMasked512 dst (VPSHLDQ512 [a] x y) mask) => (VPSHLDQMasked512Merging dst [a] x y mask)
(VPBLENDVB128 dst (VBROADCASTSS128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSSMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMOVQD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVQDMasked128Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMOVSXDQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VDIVPD512 x y) mask) => (VDIVPDMasked512Merging dst x y mask)
(VPBLENDMDMasked512 dst (VADDPS512 x y) mask) => (VADDPSMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPMOVSXBD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBDMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VPMOVUSDW256 x) mask) => (VPMOVUSDWMasked256Merging dst x mask)
(VPBLENDVB256 dst (VPMULHUW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMULLQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPROLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VPROLVQ512 x y) mask) => (VPROLVQMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPSHLDW128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMOVUSDW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDWMasked128Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMAXUQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMULLW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPRORD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VPRORQ512 [a] x) mask) => (VPRORQMasked512Merging dst [a] x mask)
(VPBLENDVB128 dst (VPSHLDD128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSRAVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VSUBPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPBROADCASTQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMINUD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSHUFD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPRORVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSLLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDMWMasked512 dst (VPSUBUSW512 x y) mask) => (VPSUBUSWMasked512Merging dst x y mask)
(VPBLENDMDMasked512 dst (VPMOVSDB128 x) mask) => (VPMOVSDBMasked128Merging dst x mask)
(VPBLENDVB256 dst (VPMOVUSQD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQDMasked128Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDMBMasked512 dst (VPMAXUB512 x y) mask) => (VPMAXUBMasked512Merging dst x y mask)
(VPBLENDMQMasked512 dst (VPMINSQ512 x y) mask) => (VPMINSQMasked512Merging dst x y mask)
(VPBLENDMQMasked512 dst (VSQRTPD512 x) mask) => (VSQRTPDMasked512Merging dst x mask)
(VPBLENDMDMasked512 dst (VSUBPS512 x y) mask) => (VSUBPSMasked512Merging dst x y mask)
(VPBLENDVB256 dst (VPSUBUSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VPMAXUD512 x y) mask) => (VPMAXUDMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VBROADCASTSS512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSSMasked512Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VPMOVSQD256 x) mask) => (VPMOVSQDMasked256Merging dst x mask)
(VPBLENDVB128 dst (VPMOVZXBD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMOVZXBQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBQMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VRSQRT14PD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRSQRT14PDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VPRORD512 [a] x) mask) => (VPRORDMasked512Merging dst [a] x mask)
(VPBLENDMWMasked512 dst (VPSUBW512 x y) mask) => (VPSUBWMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPABSW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPADDSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
(VPBLENDMBMasked512 dst (VPADDUSB512 x y) mask) => (VPADDUSBMasked512Merging dst x y mask)
(VPBLENDVB256 dst (VPMOVZXWD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWDMasked512Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VMINPD512 x y) mask) => (VMINPDMasked512Merging dst x y mask)
(VPBLENDMQMasked512 dst (VPMULLQ512 x y) mask) => (VPMULLQMasked512Merging dst x y mask)
(VPBLENDMDMasked512 dst (VPROLVD512 x y) mask) => (VPROLVDMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPSUBW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VCVTTPS2DQ512 x) mask) => (VCVTTPS2DQMasked512Merging dst x mask)
(VPBLENDVB128 dst (VPMOVZXWQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWQMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDMWMasked512 dst (VPMADDWD512 x y) mask) => (VPMADDWDMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VGF2P8MULB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VGF2P8MULBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPROLQ256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLQMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDMWMasked512 dst (VPSLLVW512 x y) mask) => (VPSLLVWMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPABSD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPAVGB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
(VPBLENDMBMasked512 dst (VPAVGB512 x y) mask) => (VPAVGBMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPBROADCASTB256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTBMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VMAXPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDMBMasked512 dst (VPMINUB512 x y) mask) => (VPMINUBMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPMINUQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VMULPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VMAXPD512 x y) mask) => (VMAXPDMasked512Merging dst x y mask)
(VPBLENDMBMasked512 dst (VPMAXSB512 x y) mask) => (VPMAXSBMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPMULHUW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VMULPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPRORVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSUBB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VPACKSSDW512 x y) mask) => (VPACKSSDWMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VCVTPS2UDQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTPS2UDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMOVZXDQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMINUB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VPRORVD512 x y) mask) => (VPRORVDMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VSCALEFPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSLLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSLLW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDMWMasked512 dst (VPABSW512 x) mask) => (VPABSWMasked512Merging dst x mask)
(VPBLENDVB128 dst (VPMOVSXBQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBQMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VSCALEFPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSLLQ256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPADDW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VMULPD512 x y) mask) => (VMULPDMasked512Merging dst x y mask)
(VPBLENDMQMasked512 dst (VPORQ512 x y) mask) => (VPORQMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPMOVSXWD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VPMOVUSQW128 x) mask) => (VPMOVUSQWMasked128Merging dst x mask)
(VPBLENDVB256 dst (VPMINSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VRSQRT14PD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRSQRT14PDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSRAW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPABSQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VREDUCEPD512 [a] x) mask) => (VREDUCEPDMasked512Merging dst [a] x mask)
(VPBLENDVB128 dst (VPMULHW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSHUFHW256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFHWMasked256Merging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDMWMasked512 dst (VPSRAW512const [a] x) mask) => (VPSRAWMasked512constMerging dst [a] x mask)
(VPBLENDMDMasked512 dst (VPADDD512 x y) mask) => (VPADDDMasked512Merging dst x y mask)
(VPBLENDMQMasked512 dst (VPOPCNTQ512 x) mask) => (VPOPCNTQMasked512Merging dst x mask)
(VPBLENDVB128 dst (VPSHRDD128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSUBB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSUBSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDMBMasked512 dst (VPSUBUSB512 x y) mask) => (VPSUBUSBMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPADDSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDMWMasked512 dst (VPADDUSW512 x y) mask) => (VPADDUSWMasked512Merging dst x y mask)
(VPBLENDVB256 dst (VMAXPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMAXSD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMINSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VMULPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VRNDSCALEPS512 [a] x) mask) => (VRNDSCALEPSMasked512Merging dst [a] x mask)
(VPBLENDMDMasked512 dst (VCVTPS2UDQ512 x) mask) => (VCVTPS2UDQMasked512Merging dst x mask)
(VPBLENDVB256 dst (VDIVPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMAXSQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VMINPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSHUFD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDMBMasked512 dst (VPSHUFB512 x y) mask) => (VPSHUFBMasked512Merging dst x y mask)
(VPBLENDVB256 dst (VPSHLDQ256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPBROADCASTQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTQMasked512Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VREDUCEPS128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMOVZXWQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWQMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VSCALEFPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VPSHRDD512 [a] x y) mask) => (VPSHRDDMasked512Merging dst [a] x y mask)
(VPBLENDVB128 dst (VPSRAVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VSQRTPD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VPXORQ512 x y) mask) => (VPXORQMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPAVGW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMOVSWB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSWBMasked128Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VDIVPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VDIVPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMINSQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDMWMasked512 dst (VPOPCNTW512 x) mask) => (VPOPCNTWMasked512Merging dst x mask)
(VPBLENDVB128 dst (VPOPCNTD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VPOPCNTD512 x) mask) => (VPOPCNTDMasked512Merging dst x mask)
(VPBLENDVB256 dst (VPABSD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPBROADCASTQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTQMasked256Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VRNDSCALEPD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VPMOVDB128 x) mask) => (VPMOVDBMasked128Merging dst x mask)
(VPBLENDVB128 dst (VPMOVSXWQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMINUW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDMWMasked512 dst (VPMINUW512 x y) mask) => (VPMINUWMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPOPCNTQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VPMOVQD256 x) mask) => (VPMOVQDMasked256Merging dst x mask)
(VPBLENDVB256 dst (VPSHRDW256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VPSRAD512const [a] x) mask) => (VPSRADMasked512constMerging dst [a] x mask)
(VPBLENDVB128 dst (VPAVGB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDMWMasked512 dst (VPAVGW512 x y) mask) => (VPAVGWMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPMOVSXBQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBQMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMOVZXBW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMAXSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMAXUD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VPMAXUQ512 x y) mask) => (VPMAXUQMasked512Merging dst x y mask)
(VPBLENDMDMasked512 dst (VMINPS512 x y) mask) => (VMINPSMasked512Merging dst x y mask)
(VPBLENDMBMasked512 dst (VPABSB512 x) mask) => (VPABSBMasked512Merging dst x mask)
(VPBLENDMDMasked512 dst (VPANDD512 x y) mask) => (VPANDDMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPMOVZXBW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMOVZXBD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMAXSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VPSHUFD512 [a] x) mask) => (VPSHUFDMasked512Merging dst [a] x mask)
(VPBLENDVB128 dst (VPSHUFHW128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFHWMasked128Merging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSHRDQ256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMADDUBSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDUBSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VPMINSD512 x y) mask) => (VPMINSDMasked512Merging dst x y mask)
(VPBLENDMDMasked512 dst (VPSRAVD512 x y) mask) => (VPSRAVDMasked512Merging dst x y mask)
(VPBLENDMQMasked512 dst (VSUBPD512 x y) mask) => (VSUBPDMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPSLLW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSLLD256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDMWMasked512 dst (VPMOVSWB256 x) mask) => (VPMOVSWBMasked256Merging dst x mask)
(VPBLENDMQMasked512 dst (VPMOVQW128 x) mask) => (VPMOVQWMasked128Merging dst x mask)
(VPBLENDVB256 dst (VPMINUQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VRCP14PD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRCP14PDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSHRDD256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VPSHRDQ512 [a] x y) mask) => (VPSHRDQMasked512Merging dst [a] x y mask)
(VPBLENDVB128 dst (VPSLLVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSRLVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPADDQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMOVSXWQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VPLZCNTQ512 x) mask) => (VPLZCNTQMasked512Merging dst x mask)
(VPBLENDVB256 dst (VPMAXUB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPRORQ256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORQMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VSCALEFPD512 x y) mask) => (VSCALEFPDMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPSUBQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSLLD128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VADDPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VADDPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VPMOVSQW128 x) mask) => (VPMOVSQWMasked128Merging dst x mask)
(VPBLENDMWMasked512 dst (VPMAXUW512 x y) mask) => (VPMAXUWMasked512Merging dst x y mask)
(VPBLENDVB256 dst (VPSHUFB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSRLVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSLLQ128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSRAD256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRADMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VPMINUQ512 x y) mask) => (VPMINUQMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPSRAVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDMWMasked512 dst (VPSRLVW512 x y) mask) => (VPSRLVWMasked512Merging dst x y mask)
(VPBLENDVB256 dst (VPSUBW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSRAW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPABSW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSWMasked256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPACKSSDW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKSSDWMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMOVSQD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSQDMasked128Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMOVSXBD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBDMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMOVZXBQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBQMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPLZCNTD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPLZCNTQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VMAXPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPAVGW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPACKSSDW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKSSDWMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMOVZXWD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWDMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPOPCNTQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSRAVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSUBSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VPXORD512 x y) mask) => (VPXORDMasked512Merging dst x y mask)
(VPBLENDMBMasked512 dst (VPADDSB512 x y) mask) => (VPADDSBMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPBROADCASTD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VMAXPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMAXUW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSHRDQ128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VPSLLVD512 x y) mask) => (VPSLLVDMasked512Merging dst x y mask)
(VPBLENDMWMasked512 dst (VPSLLW512const [a] x) mask) => (VPSLLWMasked512constMerging dst [a] x mask)
(VPBLENDMDMasked512 dst (VPSLLD512const [a] x) mask) => (VPSLLDMasked512constMerging dst [a] x mask)
(VPBLENDMWMasked512 dst (VPADDSW512 x y) mask) => (VPADDSWMasked512Merging dst x y mask)
(VPBLENDMQMasked512 dst (VPMOVSQB128 x) mask) => (VPMOVSQBMasked128Merging dst x mask)
(VPBLENDMDMasked512 dst (VPMINUD512 x y) mask) => (VPMINUDMasked512Merging dst x y mask)
(VPBLENDVB256 dst (VPOPCNTW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTWMasked256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VRSQRT14PD512 x) mask) => (VRSQRT14PDMasked512Merging dst x mask)
(VPBLENDMDMasked512 dst (VSCALEFPS512 x y) mask) => (VSCALEFPSMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VPMAXUW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSRAVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VSQRTPS512 x) mask) => (VSQRTPSMasked512Merging dst x mask)
(VPBLENDMQMasked512 dst (VPSRAQ512const [a] x) mask) => (VPSRAQMasked512constMerging dst [a] x mask)
(VPBLENDVB128 dst (VPABSB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPABSB256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSBMasked256Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPABSQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMOVDW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVDWMasked128Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDMQMasked512 dst (VPMAXSQ512 x y) mask) => (VPMAXSQMasked512Merging dst x y mask)
(VPBLENDVB128 dst (VSCALEFPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VSQRTPS128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPSMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSUBSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VPABSD512 x) mask) => (VPABSDMasked512Merging dst x mask)
(VPBLENDVB128 dst (VPBROADCASTW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTWMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMAXUB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDMDMasked512 dst (VMULPS512 x y) mask) => (VMULPSMasked512Merging dst x y mask)
(VPBLENDMWMasked512 dst (VPMULLW512 x y) mask) => (VPMULLWMasked512Merging dst x y mask)
(VPABSD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSD512load {sym} [off] ptr mem)
(VPABSQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSQ128load {sym} [off] ptr mem)
(VPABSQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSQ256load {sym} [off] ptr mem)

View file

@ -167,12 +167,6 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VMOVDQU64Masked128", argLength: 2, reg: wkw, asm: "VMOVDQU64", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VMOVDQU64Masked256", argLength: 2, reg: wkw, asm: "VMOVDQU64", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VMOVDQU64Masked512", argLength: 2, reg: wkw, asm: "VMOVDQU64", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VMOVUPDMasked128", argLength: 2, reg: wkw, asm: "VMOVUPD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VMOVUPDMasked256", argLength: 2, reg: wkw, asm: "VMOVUPD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VMOVUPDMasked512", argLength: 2, reg: wkw, asm: "VMOVUPD", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VMOVUPSMasked128", argLength: 2, reg: wkw, asm: "VMOVUPS", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VMOVUPSMasked256", argLength: 2, reg: wkw, asm: "VMOVUPS", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VMOVUPSMasked512", argLength: 2, reg: wkw, asm: "VMOVUPS", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VMULPD128", argLength: 2, reg: v21, asm: "VMULPD", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VMULPD256", argLength: 2, reg: v21, asm: "VMULPD", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VMULPD512", argLength: 2, reg: w21, asm: "VMULPD", commutative: true, typ: "Vec512", resultInArg0: false},
@ -1900,5 +1894,448 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VRNDSCALEPSMasked512load", argLength: 3, reg: wkwload, asm: "VRNDSCALEPS", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VSHUFPD512load", argLength: 3, reg: w21load, asm: "VSHUFPD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VSHUFPS512load", argLength: 3, reg: w21load, asm: "VSHUFPS", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VADDPDMasked128Merging", argLength: 4, reg: w3kw, asm: "VADDPD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VADDPDMasked256Merging", argLength: 4, reg: w3kw, asm: "VADDPD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VADDPDMasked512Merging", argLength: 4, reg: w3kw, asm: "VADDPD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VADDPSMasked128Merging", argLength: 4, reg: w3kw, asm: "VADDPS", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VADDPSMasked256Merging", argLength: 4, reg: w3kw, asm: "VADDPS", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VADDPSMasked512Merging", argLength: 4, reg: w3kw, asm: "VADDPS", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VBROADCASTSDMasked256Merging", argLength: 3, reg: w2kw, asm: "VBROADCASTSD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VBROADCASTSDMasked512Merging", argLength: 3, reg: w2kw, asm: "VBROADCASTSD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VBROADCASTSSMasked128Merging", argLength: 3, reg: w2kw, asm: "VBROADCASTSS", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VBROADCASTSSMasked256Merging", argLength: 3, reg: w2kw, asm: "VBROADCASTSS", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VBROADCASTSSMasked512Merging", argLength: 3, reg: w2kw, asm: "VBROADCASTSS", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VCVTPS2UDQMasked128Merging", argLength: 3, reg: w2kw, asm: "VCVTPS2UDQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VCVTPS2UDQMasked256Merging", argLength: 3, reg: w2kw, asm: "VCVTPS2UDQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VCVTPS2UDQMasked512Merging", argLength: 3, reg: w2kw, asm: "VCVTPS2UDQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VCVTTPS2DQMasked128Merging", argLength: 3, reg: w2kw, asm: "VCVTTPS2DQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VCVTTPS2DQMasked256Merging", argLength: 3, reg: w2kw, asm: "VCVTTPS2DQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VCVTTPS2DQMasked512Merging", argLength: 3, reg: w2kw, asm: "VCVTTPS2DQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VDIVPDMasked128Merging", argLength: 4, reg: w3kw, asm: "VDIVPD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VDIVPDMasked256Merging", argLength: 4, reg: w3kw, asm: "VDIVPD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VDIVPDMasked512Merging", argLength: 4, reg: w3kw, asm: "VDIVPD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VDIVPSMasked128Merging", argLength: 4, reg: w3kw, asm: "VDIVPS", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VDIVPSMasked256Merging", argLength: 4, reg: w3kw, asm: "VDIVPS", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VDIVPSMasked512Merging", argLength: 4, reg: w3kw, asm: "VDIVPS", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VGF2P8MULBMasked128Merging", argLength: 4, reg: w3kw, asm: "VGF2P8MULB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VGF2P8MULBMasked256Merging", argLength: 4, reg: w3kw, asm: "VGF2P8MULB", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VGF2P8MULBMasked512Merging", argLength: 4, reg: w3kw, asm: "VGF2P8MULB", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VMAXPDMasked128Merging", argLength: 4, reg: w3kw, asm: "VMAXPD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VMAXPDMasked256Merging", argLength: 4, reg: w3kw, asm: "VMAXPD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VMAXPDMasked512Merging", argLength: 4, reg: w3kw, asm: "VMAXPD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VMAXPSMasked128Merging", argLength: 4, reg: w3kw, asm: "VMAXPS", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VMAXPSMasked256Merging", argLength: 4, reg: w3kw, asm: "VMAXPS", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VMAXPSMasked512Merging", argLength: 4, reg: w3kw, asm: "VMAXPS", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VMINPDMasked128Merging", argLength: 4, reg: w3kw, asm: "VMINPD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VMINPDMasked256Merging", argLength: 4, reg: w3kw, asm: "VMINPD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VMINPDMasked512Merging", argLength: 4, reg: w3kw, asm: "VMINPD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VMINPSMasked128Merging", argLength: 4, reg: w3kw, asm: "VMINPS", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VMINPSMasked256Merging", argLength: 4, reg: w3kw, asm: "VMINPS", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VMINPSMasked512Merging", argLength: 4, reg: w3kw, asm: "VMINPS", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VMULPDMasked128Merging", argLength: 4, reg: w3kw, asm: "VMULPD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VMULPDMasked256Merging", argLength: 4, reg: w3kw, asm: "VMULPD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VMULPDMasked512Merging", argLength: 4, reg: w3kw, asm: "VMULPD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VMULPSMasked128Merging", argLength: 4, reg: w3kw, asm: "VMULPS", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VMULPSMasked256Merging", argLength: 4, reg: w3kw, asm: "VMULPS", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VMULPSMasked512Merging", argLength: 4, reg: w3kw, asm: "VMULPS", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPABSBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPABSB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPABSBMasked256Merging", argLength: 3, reg: w2kw, asm: "VPABSB", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPABSBMasked512Merging", argLength: 3, reg: w2kw, asm: "VPABSB", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPABSDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPABSD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPABSDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPABSD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPABSDMasked512Merging", argLength: 3, reg: w2kw, asm: "VPABSD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPABSQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPABSQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPABSQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPABSQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPABSQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPABSQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPABSWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPABSW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPABSWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPABSW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPABSWMasked512Merging", argLength: 3, reg: w2kw, asm: "VPABSW", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPACKSSDWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPACKSSDW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPACKSSDWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPACKSSDW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPACKSSDWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPACKSSDW", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPACKUSDWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPACKUSDW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPACKUSDWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPACKUSDW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPACKUSDWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPACKUSDW", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPADDBMasked128Merging", argLength: 4, reg: w3kw, asm: "VPADDB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPADDBMasked256Merging", argLength: 4, reg: w3kw, asm: "VPADDB", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPADDBMasked512Merging", argLength: 4, reg: w3kw, asm: "VPADDB", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPADDDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPADDD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPADDDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPADDD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPADDDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPADDD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPADDQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPADDQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPADDQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPADDQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPADDQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPADDQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPADDSBMasked128Merging", argLength: 4, reg: w3kw, asm: "VPADDSB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPADDSBMasked256Merging", argLength: 4, reg: w3kw, asm: "VPADDSB", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPADDSBMasked512Merging", argLength: 4, reg: w3kw, asm: "VPADDSB", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPADDSWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPADDSW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPADDSWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPADDSW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPADDSWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPADDSW", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPADDUSBMasked128Merging", argLength: 4, reg: w3kw, asm: "VPADDUSB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPADDUSBMasked256Merging", argLength: 4, reg: w3kw, asm: "VPADDUSB", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPADDUSBMasked512Merging", argLength: 4, reg: w3kw, asm: "VPADDUSB", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPADDUSWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPADDUSW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPADDUSWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPADDUSW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPADDUSWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPADDUSW", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPADDWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPADDW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPADDWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPADDW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPADDWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPADDW", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPANDDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPANDD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPANDDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPANDD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPANDDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPANDD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPANDQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPANDQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPANDQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPANDQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPANDQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPANDQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPAVGBMasked128Merging", argLength: 4, reg: w3kw, asm: "VPAVGB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPAVGBMasked256Merging", argLength: 4, reg: w3kw, asm: "VPAVGB", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPAVGBMasked512Merging", argLength: 4, reg: w3kw, asm: "VPAVGB", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPAVGWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPAVGW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPAVGWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPAVGW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPAVGWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPAVGW", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPBROADCASTBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPBROADCASTB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPBROADCASTBMasked256Merging", argLength: 3, reg: w2kw, asm: "VPBROADCASTB", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPBROADCASTBMasked512Merging", argLength: 3, reg: w2kw, asm: "VPBROADCASTB", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPBROADCASTDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPBROADCASTD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPBROADCASTDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPBROADCASTD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPBROADCASTDMasked512Merging", argLength: 3, reg: w2kw, asm: "VPBROADCASTD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPBROADCASTQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPBROADCASTQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPBROADCASTQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPBROADCASTQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPBROADCASTQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPBROADCASTQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPBROADCASTWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPBROADCASTW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPBROADCASTWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPBROADCASTW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPBROADCASTWMasked512Merging", argLength: 3, reg: w2kw, asm: "VPBROADCASTW", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPLZCNTDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPLZCNTD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPLZCNTDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPLZCNTD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPLZCNTDMasked512Merging", argLength: 3, reg: w2kw, asm: "VPLZCNTD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPLZCNTQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPLZCNTQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPLZCNTQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPLZCNTQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPLZCNTQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPLZCNTQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMADDUBSWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMADDUBSW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMADDUBSWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMADDUBSW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMADDUBSWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMADDUBSW", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMADDWDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMADDWD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMADDWDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMADDWD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMADDWDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMADDWD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMAXSBMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMAXSB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMAXSBMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMAXSB", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMAXSBMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMAXSB", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMAXSDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMAXSD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMAXSDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMAXSD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMAXSDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMAXSD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMAXSQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMAXSQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMAXSQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMAXSQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMAXSQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMAXSQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMAXSWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMAXSW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMAXSWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMAXSW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMAXSWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMAXSW", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMAXUBMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMAXUB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMAXUBMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMAXUB", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMAXUBMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMAXUB", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMAXUDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMAXUD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMAXUDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMAXUD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMAXUDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMAXUD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMAXUQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMAXUQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMAXUQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMAXUQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMAXUQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMAXUQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMAXUWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMAXUW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMAXUWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMAXUW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMAXUWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMAXUW", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMINSBMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMINSB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMINSBMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMINSB", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMINSBMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMINSB", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMINSDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMINSD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMINSDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMINSD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMINSDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMINSD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMINSQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMINSQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMINSQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMINSQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMINSQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMINSQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMINSWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMINSW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMINSWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMINSW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMINSWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMINSW", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMINUBMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMINUB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMINUBMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMINUB", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMINUBMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMINUB", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMINUDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMINUD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMINUDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMINUD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMINUDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMINUD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMINUQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMINUQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMINUQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMINUQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMINUQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMINUQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMINUWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMINUW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMINUWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMINUW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMINUWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMINUW", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMOVDBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVDWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVDWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVDW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMOVQBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVQDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVQDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVQD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMOVQWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVSDBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVSDWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVSDWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMOVSQBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVSQDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVSQDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMOVSQWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVSWBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVSWBMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSWB", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMOVSXBDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXBD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVSXBDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXBD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMOVSXBDMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXBD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMOVSXBQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXBQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVSXBQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXBQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMOVSXBQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXBQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMOVSXBWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXBW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVSXBWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXBW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMOVSXBWMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXBW", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMOVSXDQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXDQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVSXDQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXDQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMOVSXDQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXDQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMOVSXWDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVSXWDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMOVSXWDMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMOVSXWQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVSXWQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMOVSXWQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMOVUSDBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVUSDWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVUSDWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMOVUSQBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVUSQDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVUSQDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMOVUSQWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVUSWBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVUSWBMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMOVWBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVWBMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVWB", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMOVZXBDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXBD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVZXBDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXBD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMOVZXBDMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXBD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMOVZXBQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXBQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVZXBQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXBQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMOVZXBQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXBQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMOVZXBWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXBW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVZXBWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXBW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMOVZXBWMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXBW", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMOVZXDQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXDQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVZXDQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXDQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMOVZXDQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXDQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMOVZXWDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXWD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVZXWDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXWD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMOVZXWDMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXWD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMOVZXWQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXWQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVZXWQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXWQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMOVZXWQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXWQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMULHUWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMULHUW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMULHUWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMULHUW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMULHUWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMULHUW", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMULHWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMULHW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMULHWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMULHW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMULHWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMULHW", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMULLDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMULLD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMULLDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMULLD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMULLDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMULLD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMULLQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMULLQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMULLQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMULLQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMULLQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMULLQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPMULLWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMULLW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMULLWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMULLW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMULLWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMULLW", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPOPCNTBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPOPCNTB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPOPCNTBMasked256Merging", argLength: 3, reg: w2kw, asm: "VPOPCNTB", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPOPCNTBMasked512Merging", argLength: 3, reg: w2kw, asm: "VPOPCNTB", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPOPCNTDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPOPCNTD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPOPCNTDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPOPCNTD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPOPCNTDMasked512Merging", argLength: 3, reg: w2kw, asm: "VPOPCNTD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPOPCNTQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPOPCNTQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPOPCNTQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPOPCNTQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPOPCNTQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPOPCNTQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPOPCNTWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPOPCNTW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPOPCNTWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPOPCNTW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPOPCNTWMasked512Merging", argLength: 3, reg: w2kw, asm: "VPOPCNTW", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPORDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPORD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPORDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPORD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPORDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPORD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPORQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPORQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPORQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPORQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPORQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPORQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPROLVDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPROLVD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPROLVDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPROLVD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPROLVDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPROLVD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPROLVQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPROLVQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPROLVQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPROLVQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPROLVQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPROLVQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPRORVDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPRORVD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPRORVDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPRORVD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPRORVDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPRORVD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPRORVQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPRORVQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPRORVQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPRORVQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPRORVQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPRORVQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSHUFBMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSHUFB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSHUFBMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSHUFB", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSHUFBMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSHUFB", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSLLVDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSLLVD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSLLVDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSLLVD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSLLVDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSLLVD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSLLVQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSLLVQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSLLVQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSLLVQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSLLVQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSLLVQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSLLVWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSLLVW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSLLVWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSLLVW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSLLVWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSLLVW", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSRAVDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSRAVD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSRAVDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSRAVD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSRAVDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSRAVD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSRAVQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSRAVQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSRAVQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSRAVQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSRAVQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSRAVQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSRAVWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSRAVW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSRAVWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSRAVW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSRAVWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSRAVW", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSRLVDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSRLVD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSRLVDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSRLVD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSRLVDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSRLVD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSRLVQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSRLVQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSRLVQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSRLVQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSRLVQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSRLVWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSRLVW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSRLVWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSRLVW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSRLVWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSRLVW", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSUBBMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSUBB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSUBBMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSUBB", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSUBBMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSUBB", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSUBDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSUBD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSUBDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSUBD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSUBDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSUBD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSUBQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSUBQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSUBQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSUBQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSUBQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSUBQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSUBSBMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSUBSB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSUBSBMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSUBSB", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSUBSBMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSUBSB", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSUBSWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSUBSW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSUBSWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSUBSW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSUBSWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSUBSW", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSUBUSBMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSUBUSB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSUBUSBMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSUBUSB", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSUBUSBMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSUBUSB", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSUBUSWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSUBUSW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSUBUSWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSUBUSW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSUBUSWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSUBUSW", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSUBWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSUBW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSUBWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSUBW", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSUBWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSUBW", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPXORDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPXORD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPXORDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPXORD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPXORDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPXORD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPXORQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPXORQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPXORQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPXORQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPXORQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPXORQ", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VRCP14PDMasked128Merging", argLength: 3, reg: w2kw, asm: "VRCP14PD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VRCP14PDMasked256Merging", argLength: 3, reg: w2kw, asm: "VRCP14PD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VRCP14PDMasked512Merging", argLength: 3, reg: w2kw, asm: "VRCP14PD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VRCP14PSMasked128Merging", argLength: 3, reg: w2kw, asm: "VRCP14PS", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VRCP14PSMasked256Merging", argLength: 3, reg: w2kw, asm: "VRCP14PS", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VRCP14PSMasked512Merging", argLength: 3, reg: w2kw, asm: "VRCP14PS", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VRSQRT14PDMasked128Merging", argLength: 3, reg: w2kw, asm: "VRSQRT14PD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VRSQRT14PDMasked256Merging", argLength: 3, reg: w2kw, asm: "VRSQRT14PD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VRSQRT14PDMasked512Merging", argLength: 3, reg: w2kw, asm: "VRSQRT14PD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VRSQRT14PSMasked128Merging", argLength: 3, reg: w2kw, asm: "VRSQRT14PS", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VRSQRT14PSMasked256Merging", argLength: 3, reg: w2kw, asm: "VRSQRT14PS", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VRSQRT14PSMasked512Merging", argLength: 3, reg: w2kw, asm: "VRSQRT14PS", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VSCALEFPDMasked128Merging", argLength: 4, reg: w3kw, asm: "VSCALEFPD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VSCALEFPDMasked256Merging", argLength: 4, reg: w3kw, asm: "VSCALEFPD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VSCALEFPDMasked512Merging", argLength: 4, reg: w3kw, asm: "VSCALEFPD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VSCALEFPSMasked128Merging", argLength: 4, reg: w3kw, asm: "VSCALEFPS", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VSCALEFPSMasked256Merging", argLength: 4, reg: w3kw, asm: "VSCALEFPS", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VSCALEFPSMasked512Merging", argLength: 4, reg: w3kw, asm: "VSCALEFPS", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VSQRTPDMasked128Merging", argLength: 3, reg: w2kw, asm: "VSQRTPD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VSQRTPDMasked256Merging", argLength: 3, reg: w2kw, asm: "VSQRTPD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VSQRTPDMasked512Merging", argLength: 3, reg: w2kw, asm: "VSQRTPD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VSQRTPSMasked128Merging", argLength: 3, reg: w2kw, asm: "VSQRTPS", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VSQRTPSMasked256Merging", argLength: 3, reg: w2kw, asm: "VSQRTPS", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VSQRTPSMasked512Merging", argLength: 3, reg: w2kw, asm: "VSQRTPS", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VSUBPDMasked128Merging", argLength: 4, reg: w3kw, asm: "VSUBPD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VSUBPDMasked256Merging", argLength: 4, reg: w3kw, asm: "VSUBPD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VSUBPDMasked512Merging", argLength: 4, reg: w3kw, asm: "VSUBPD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VSUBPSMasked128Merging", argLength: 4, reg: w3kw, asm: "VSUBPS", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VSUBPSMasked256Merging", argLength: 4, reg: w3kw, asm: "VSUBPS", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VSUBPSMasked512Merging", argLength: 4, reg: w3kw, asm: "VSUBPS", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPROLDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPROLD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPROLDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPROLD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPROLDMasked512Merging", argLength: 3, reg: w2kw, asm: "VPROLD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPROLQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPROLQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPROLQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPROLQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPROLQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPROLQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPRORDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPRORD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPRORDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPRORD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPRORDMasked512Merging", argLength: 3, reg: w2kw, asm: "VPRORD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPRORQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPRORQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPRORQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPRORQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPRORQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPRORQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSHLDDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSHLDD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSHLDDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSHLDD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSHLDDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSHLDD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSHLDQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSHLDQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSHLDQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSHLDQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSHLDQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSHLDQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSHLDWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSHLDW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSHLDWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSHLDW", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSHLDWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSHLDW", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSHRDDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSHRDD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSHRDDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSHRDD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSHRDDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSHRDD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSHRDQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSHRDQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSHRDQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSHRDQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSHRDQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSHRDQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSHRDWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSHRDW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSHRDWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSHRDW", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSHRDWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSHRDW", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSHUFDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPSHUFD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSHUFDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPSHUFD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSHUFDMasked512Merging", argLength: 3, reg: w2kw, asm: "VPSHUFD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSHUFHWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPSHUFHW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSHUFHWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPSHUFHW", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSHUFHWMasked512Merging", argLength: 3, reg: w2kw, asm: "VPSHUFHW", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSLLDMasked128constMerging", argLength: 3, reg: w2kw, asm: "VPSLLD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSLLDMasked256constMerging", argLength: 3, reg: w2kw, asm: "VPSLLD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSLLDMasked512constMerging", argLength: 3, reg: w2kw, asm: "VPSLLD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSLLQMasked128constMerging", argLength: 3, reg: w2kw, asm: "VPSLLQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSLLQMasked256constMerging", argLength: 3, reg: w2kw, asm: "VPSLLQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSLLQMasked512constMerging", argLength: 3, reg: w2kw, asm: "VPSLLQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSLLWMasked128constMerging", argLength: 3, reg: w2kw, asm: "VPSLLW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSLLWMasked256constMerging", argLength: 3, reg: w2kw, asm: "VPSLLW", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSLLWMasked512constMerging", argLength: 3, reg: w2kw, asm: "VPSLLW", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSRADMasked128constMerging", argLength: 3, reg: w2kw, asm: "VPSRAD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSRADMasked256constMerging", argLength: 3, reg: w2kw, asm: "VPSRAD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSRADMasked512constMerging", argLength: 3, reg: w2kw, asm: "VPSRAD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSRAQMasked128constMerging", argLength: 3, reg: w2kw, asm: "VPSRAQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSRAQMasked256constMerging", argLength: 3, reg: w2kw, asm: "VPSRAQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSRAQMasked512constMerging", argLength: 3, reg: w2kw, asm: "VPSRAQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSRAWMasked128constMerging", argLength: 3, reg: w2kw, asm: "VPSRAW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSRAWMasked256constMerging", argLength: 3, reg: w2kw, asm: "VPSRAW", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSRAWMasked512constMerging", argLength: 3, reg: w2kw, asm: "VPSRAW", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSRLDMasked128constMerging", argLength: 3, reg: w2kw, asm: "VPSRLD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSRLDMasked256constMerging", argLength: 3, reg: w2kw, asm: "VPSRLD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSRLDMasked512constMerging", argLength: 3, reg: w2kw, asm: "VPSRLD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSRLQMasked128constMerging", argLength: 3, reg: w2kw, asm: "VPSRLQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSRLQMasked256constMerging", argLength: 3, reg: w2kw, asm: "VPSRLQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSRLQMasked512constMerging", argLength: 3, reg: w2kw, asm: "VPSRLQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSRLWMasked128constMerging", argLength: 3, reg: w2kw, asm: "VPSRLW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSRLWMasked256constMerging", argLength: 3, reg: w2kw, asm: "VPSRLW", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSRLWMasked512constMerging", argLength: 3, reg: w2kw, asm: "VPSRLW", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VREDUCEPDMasked128Merging", argLength: 3, reg: w2kw, asm: "VREDUCEPD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VREDUCEPDMasked256Merging", argLength: 3, reg: w2kw, asm: "VREDUCEPD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VREDUCEPDMasked512Merging", argLength: 3, reg: w2kw, asm: "VREDUCEPD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VREDUCEPSMasked128Merging", argLength: 3, reg: w2kw, asm: "VREDUCEPS", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VREDUCEPSMasked256Merging", argLength: 3, reg: w2kw, asm: "VREDUCEPS", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VREDUCEPSMasked512Merging", argLength: 3, reg: w2kw, asm: "VREDUCEPS", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VRNDSCALEPDMasked128Merging", argLength: 3, reg: w2kw, asm: "VRNDSCALEPD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VRNDSCALEPDMasked256Merging", argLength: 3, reg: w2kw, asm: "VRNDSCALEPD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VRNDSCALEPDMasked512Merging", argLength: 3, reg: w2kw, asm: "VRNDSCALEPD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VRNDSCALEPSMasked128Merging", argLength: 3, reg: w2kw, asm: "VRNDSCALEPS", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VRNDSCALEPSMasked256Merging", argLength: 3, reg: w2kw, asm: "VRNDSCALEPS", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VRNDSCALEPSMasked512Merging", argLength: 3, reg: w2kw, asm: "VRNDSCALEPS", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
}
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -30,6 +30,12 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{{- range .OpsDataImmLoad}}
{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", aux: "SymValAndOff", symEffect: "Read", resultInArg0: {{.ResultInArg0}}},
{{- end}}
{{- range .OpsDataMerging }}
{name: "{{.OpName}}Merging", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: false, typ: "{{.Type}}", resultInArg0: true},
{{- end }}
{{- range .OpsDataImmMerging }}
{name: "{{.OpName}}Merging", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", aux: "UInt8", commutative: false, typ: "{{.Type}}", resultInArg0: true},
{{- end }}
}
}
`
@ -51,10 +57,12 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
ResultInArg0 bool
}
type machineOpsData struct {
OpsData []opData
OpsDataImm []opData
OpsDataLoad []opData
OpsDataImmLoad []opData
OpsData []opData
OpsDataImm []opData
OpsDataLoad []opData
OpsDataImmLoad []opData
OpsDataMerging []opData
OpsDataImmMerging []opData
}
regInfoSet := map[string]bool{
@ -66,6 +74,8 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
opsDataImm := make([]opData, 0)
opsDataLoad := make([]opData, 0)
opsDataImmLoad := make([]opData, 0)
opsDataMerging := make([]opData, 0)
opsDataImmMerging := make([]opData, 0)
// Determine the "best" version of an instruction to use
best := make(map[string]Operation)
@ -98,7 +108,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
regInfoMissing := make(map[string]bool, 0)
for _, asm := range mOpOrder {
op := best[asm]
shapeIn, shapeOut, _, _, gOp := op.shape()
shapeIn, shapeOut, maskType, _, gOp := op.shape()
// TODO: all our masked operations are now zeroing, we need to generate machine ops with merging masks, maybe copy
// one here with a name suffix "Merging". The rewrite rules will need them.
@ -147,11 +157,13 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
resultInArg0 = true
}
var memOpData *opData
regInfoMerging := regInfo
hasMerging := false
if op.MemFeatures != nil && *op.MemFeatures == "vbcst" {
// Right now we only have vbcst case
// Make a full vec memory variant.
op = rewriteLastVregToMem(op)
regInfo, err := makeRegInfo(op, VregMemIn)
opMem := rewriteLastVregToMem(op)
regInfo, err := makeRegInfo(opMem, VregMemIn)
if err != nil {
// Just skip it if it's non nill.
// an error could be triggered by [checkVecAsScalar].
@ -163,16 +175,51 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
memOpData = &opData{asm + "load", gOp.Asm, len(gOp.In) + 1, regInfo, false, outType, resultInArg0}
}
}
hasMerging = gOp.hasMaskedMerging(maskType, shapeOut)
if hasMerging && !resultInArg0 {
// We have to copy the slice here becasue the sort will be visible from other
// aliases when no reslicing is happening.
newIn := make([]Operand, len(op.In), len(op.In)+1)
copy(newIn, op.In)
op.In = newIn
op.In = append(op.In, op.Out[0])
op.sortOperand()
regInfoMerging, err = makeRegInfo(op, NoMem)
if err != nil {
panic(err)
}
}
if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn {
opsDataImm = append(opsDataImm, opData{asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0})
if memOpData != nil {
if *op.MemFeatures != "vbcst" {
panic("simdgen only knows vbcst for mem ops for now")
}
opsDataImmLoad = append(opsDataImmLoad, *memOpData)
}
if hasMerging {
mergingLen := len(gOp.In)
if !resultInArg0 {
mergingLen++
}
opsDataImmMerging = append(opsDataImmMerging, opData{asm, gOp.Asm, mergingLen, regInfoMerging, gOp.Commutative, outType, resultInArg0})
}
} else {
opsData = append(opsData, opData{asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0})
if memOpData != nil {
if *op.MemFeatures != "vbcst" {
panic("simdgen only knows vbcst for mem ops for now")
}
opsDataLoad = append(opsDataLoad, *memOpData)
}
if hasMerging {
mergingLen := len(gOp.In)
if !resultInArg0 {
mergingLen++
}
opsDataMerging = append(opsDataMerging, opData{asm, gOp.Asm, mergingLen, regInfoMerging, gOp.Commutative, outType, resultInArg0})
}
}
}
if len(regInfoErrs) != 0 {
@ -193,7 +240,14 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
sort.Slice(opsDataImmLoad, func(i, j int) bool {
return compareNatural(opsDataImmLoad[i].OpName, opsDataImmLoad[j].OpName) < 0
})
err := t.Execute(buffer, machineOpsData{opsData, opsDataImm, opsDataLoad, opsDataImmLoad})
sort.Slice(opsDataMerging, func(i, j int) bool {
return compareNatural(opsDataMerging[i].OpName, opsDataMerging[j].OpName) < 0
})
sort.Slice(opsDataImmMerging, func(i, j int) bool {
return compareNatural(opsDataImmMerging[i].OpName, opsDataImmMerging[j].OpName) < 0
})
err := t.Execute(buffer, machineOpsData{opsData, opsDataImm, opsDataLoad, opsDataImmLoad,
opsDataMerging, opsDataImmMerging})
if err != nil {
panic(fmt.Errorf("failed to execute template: %w", err))
}

View file

@ -585,8 +585,8 @@ func writeSIMDFeatures(ops []Operation) *bytes.Buffer {
return buffer
}
// writeSIMDStubs generates the simd vector intrinsic stubs and writes it to ops_amd64.go and ops_internal_amd64.go
// within the specified directory.
// writeSIMDStubs returns two bytes.Buffers containing the declarations for the public
// and internal-use vector intrinsics.
func writeSIMDStubs(ops []Operation, typeMap simdTypeMap) (f, fI *bytes.Buffer) {
t := templateOf(simdStubsTmpl, "simdStubs")
f = new(bytes.Buffer)

View file

@ -126,6 +126,9 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
buffer := new(bytes.Buffer)
buffer.WriteString(generatedHeader + "\n")
// asm -> masked merging rules
maskedMergeOpts := make(map[string]string)
s2n := map[int]string{8: "B", 16: "W", 32: "D", 64: "Q"}
asmCheck := map[string]bool{}
var allData []tplRuleData
var optData []tplRuleData // for mask peephole optimizations, and other misc
@ -295,6 +298,33 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
memOpData.tplName = "vregMem"
}
memOptData = append(memOptData, memOpData)
asmCheck[memOpData.Asm+"load"] = true
}
}
// Generate the masked merging optimization rules
if gOp.hasMaskedMerging(maskType, opOutShape) {
// TODO: handle customized operand order and special lower.
maskElem := gOp.In[len(gOp.In)-1]
if maskElem.Bits == nil {
panic("mask has no bits")
}
if maskElem.ElemBits == nil {
panic("mask has no elemBits")
}
if maskElem.Lanes == nil {
panic("mask has no lanes")
}
switch *maskElem.Bits {
case 128, 256:
// VPBLENDVB cases.
noMaskName := machineOpName(NoMask, gOp)
maskedMergeOpts[noMaskName] = fmt.Sprintf("(VPBLENDVB%d dst (%s %s) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (%sMerging dst %s (VPMOVVec%dx%dToM <types.TypeMask> mask))\n",
*maskElem.Bits, noMaskName, data.Args, data.Asm, data.Args, *maskElem.ElemBits, *maskElem.Lanes)
case 512:
// VPBLENDM[BWDQ] cases.
noMaskName := machineOpName(NoMask, gOp)
maskedMergeOpts[noMaskName] = fmt.Sprintf("(VPBLENDM%sMasked%d dst (%s %s) mask) => (%sMerging dst %s mask)\n",
s2n[*maskElem.ElemBits], *maskElem.Bits, noMaskName, data.Args, data.Asm, data.Args)
}
}
@ -332,6 +362,13 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
}
}
for asm, rule := range maskedMergeOpts {
if !asmCheck[asm] {
continue
}
buffer.WriteString(rule)
}
for _, data := range memOptData {
if err := ruleTemplates.ExecuteTemplate(buffer, data.tplName, data); err != nil {
panic(fmt.Errorf("failed to execute template %s for %s: %w", data.tplName, data.Asm, err))

View file

@ -99,6 +99,7 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
"v21ResultInArg0",
"v21ResultInArg0Imm8",
"v31x0AtIn2ResultInArg0",
"v2kvResultInArg0",
}
regInfoSet := map[string][]string{}
for _, key := range regInfoKeys {
@ -107,7 +108,8 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
seen := map[string]struct{}{}
allUnseen := make(map[string][]Operation)
classifyOp := func(op Operation, shapeIn inShape, shapeOut outShape, caseStr string, mem memShape) error {
allUnseenCaseStr := make(map[string][]string)
classifyOp := func(op Operation, maskType maskShape, shapeIn inShape, shapeOut outShape, caseStr string, mem memShape) error {
regShape, err := op.regShape(mem)
if err != nil {
return err
@ -127,8 +129,31 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
}
if _, ok := regInfoSet[regShape]; !ok {
allUnseen[regShape] = append(allUnseen[regShape], op)
allUnseenCaseStr[regShape] = append(allUnseenCaseStr[regShape], caseStr)
}
regInfoSet[regShape] = append(regInfoSet[regShape], caseStr)
if mem == NoMem && op.hasMaskedMerging(maskType, shapeOut) {
regShapeMerging := regShape
if shapeOut != OneVregOutAtIn {
// We have to copy the slice here becasue the sort will be visible from other
// aliases when no reslicing is happening.
newIn := make([]Operand, len(op.In), len(op.In)+1)
copy(newIn, op.In)
op.In = newIn
op.In = append(op.In, op.Out[0])
op.sortOperand()
regShapeMerging, err = op.regShape(mem)
regShapeMerging += "ResultInArg0"
}
if err != nil {
return err
}
if _, ok := regInfoSet[regShapeMerging]; !ok {
allUnseen[regShapeMerging] = append(allUnseen[regShapeMerging], op)
allUnseenCaseStr[regShapeMerging] = append(allUnseenCaseStr[regShapeMerging], caseStr+"Merging")
}
regInfoSet[regShapeMerging] = append(regInfoSet[regShapeMerging], caseStr+"Merging")
}
return nil
}
for _, op := range ops {
@ -146,7 +171,7 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
isZeroMasking = true
}
}
if err := classifyOp(op, shapeIn, shapeOut, caseStr, NoMem); err != nil {
if err := classifyOp(op, maskType, shapeIn, shapeOut, caseStr, NoMem); err != nil {
panic(err)
}
if op.MemFeatures != nil && *op.MemFeatures == "vbcst" {
@ -155,7 +180,7 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
// Ignore the error
// an error could be triggered by [checkVecAsScalar].
// TODO: make [checkVecAsScalar] aware of mem ops.
if err := classifyOp(op, shapeIn, shapeOut, caseStr+"load", VregMemIn); err != nil {
if err := classifyOp(op, maskType, shapeIn, shapeOut, caseStr+"load", VregMemIn); err != nil {
if *Verbose {
log.Printf("Seen error: %e", err)
}
@ -169,7 +194,7 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
for k := range allUnseen {
allKeys = append(allKeys, k)
}
panic(fmt.Errorf("unsupported register constraint for prog, please update gen_simdssa.go and amd64/ssa.go: %+v\nAll keys: %v", allUnseen, allKeys))
panic(fmt.Errorf("unsupported register constraint for prog, please update gen_simdssa.go and amd64/ssa.go: %+v\nAll keys: %v\n, cases: %v\n", allUnseen, allKeys, allUnseenCaseStr))
}
buffer := new(bytes.Buffer)

View file

@ -523,10 +523,6 @@ func checkVecAsScalar(op Operation) (idx int, err error) {
}
}
if idx >= 0 {
if idx != 1 {
err = fmt.Errorf("simdgen only supports TreatLikeAScalarOfSize at the 2nd arg of the arg list: %s", op)
return
}
if sSize != 8 && sSize != 16 && sSize != 32 && sSize != 64 {
err = fmt.Errorf("simdgen does not recognize this uint size: %d, %s", sSize, op)
return
@ -545,6 +541,10 @@ func rewriteVecAsScalarRegInfo(op Operation, regInfo string) (string, error) {
regInfo = "vfpv"
} else if regInfo == "v2kv" {
regInfo = "vfpkv"
} else if regInfo == "v31" {
regInfo = "v2fpv"
} else if regInfo == "v3kv" {
regInfo = "v2fpkv"
} else {
return "", fmt.Errorf("simdgen does not recognize uses of treatLikeAScalarOfSize with op regShape %s in op: %s", regInfo, op)
}
@ -807,6 +807,12 @@ func reportXEDInconsistency(ops []Operation) error {
return nil
}
func (o *Operation) hasMaskedMerging(maskType maskShape, outType outShape) bool {
// BLEND and VMOVDQU are not user-facing ops so we should filter them out.
return o.OperandOrder == nil && o.SpecialLower == nil && maskType == OneMask && outType == OneVregOut &&
len(o.InVariant) == 1 && !strings.Contains(o.Asm, "BLEND") && !strings.Contains(o.Asm, "VMOVDQU")
}
func getVbcstData(s string) (feat1Match, feat2Match string) {
_, err := fmt.Sscanf(s, "feat1=%[^;];feat2=%s", &feat1Match, &feat2Match)
if err != nil {

View file

@ -299,21 +299,6 @@
out:
- *v
# For AVX512
- go: move
asm: VMOVUP[SD]
zeroing: true
in:
- &v
go: $t
class: vreg
base: float
inVariant:
-
class: mask
out:
- *v
- go: Expand
asm: "VPEXPAND[BWDQ]|VEXPANDP[SD]"
in:

View file

@ -1108,3 +1108,22 @@ func TestSelectTernOptInt32x16(t *testing.T) {
}
foo(t2, applyTo3(x, y, z, ft2))
}
func TestMaskedMerge(t *testing.T) {
x := simd.LoadInt64x4Slice([]int64{1, 2, 3, 4})
y := simd.LoadInt64x4Slice([]int64{5, 6, 1, 1})
z := simd.LoadInt64x4Slice([]int64{-1, -2, -3, -4})
res := make([]int64, 4)
expected := []int64{6, 8, -3, -4}
mask := x.Less(y)
if simd.HasAVX512() {
x.Add(y).Merge(z, mask).StoreSlice(res)
} else {
x.Add(y).Merge(z, mask).StoreSlice(res)
}
for i := range 4 {
if res[i] != expected[i] {
t.Errorf("got %d wanted %d", res[i], expected[i])
}
}
}

View file

@ -67,3 +67,13 @@ func simdFeatureGuardedMaskOpt() simd.Int16x16 {
mask := simd.Mask16x16FromBits(5)
return x.Add(y).Masked(mask) // amd64:`VPAND\s.*$`
}
func simdMaskedMerge() simd.Int16x16 {
var x, y simd.Int16x16
if simd.HasAVX512() {
mask := simd.Mask16x16FromBits(5)
return x.Add(y).Merge(x, mask) // amd64:-`VPBLENDVB\s.*$`
}
mask := simd.Mask16x16FromBits(5)
return x.Add(y).Merge(x, mask) // amd64:`VPBLENDVB\s.*$`
}