mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile: add masked merging ops and optimizations
This CL generates optimizations for masked variant of AVX512 instructions for patterns: x.Op(y).Merge(z, mask) => OpMasked(z, x, y mask), where OpMasked is resultInArg0. Change-Id: Ife7ccc9ddbf76ae921a085bd6a42b965da9bc179 Reviewed-on: https://go-review.googlesource.com/c/go/+/718160 Reviewed-by: David Chase <drchase@google.com> TryBot-Bypass: Junyang Shao <shaojunyang@google.com>
This commit is contained in:
parent
771a1dc216
commit
86b4fe31d9
15 changed files with 17367 additions and 627 deletions
|
|
@ -914,12 +914,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VSQRTPDMasked128,
|
||||
ssa.OpAMD64VSQRTPDMasked256,
|
||||
ssa.OpAMD64VSQRTPDMasked512,
|
||||
ssa.OpAMD64VMOVUPSMasked128,
|
||||
ssa.OpAMD64VMOVUPSMasked256,
|
||||
ssa.OpAMD64VMOVUPSMasked512,
|
||||
ssa.OpAMD64VMOVUPDMasked128,
|
||||
ssa.OpAMD64VMOVUPDMasked256,
|
||||
ssa.OpAMD64VMOVUPDMasked512,
|
||||
ssa.OpAMD64VMOVDQU8Masked128,
|
||||
ssa.OpAMD64VMOVDQU8Masked256,
|
||||
ssa.OpAMD64VMOVDQU8Masked512,
|
||||
|
|
@ -1225,6 +1219,129 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPDPBUSDSMasked128,
|
||||
ssa.OpAMD64VPDPBUSDSMasked256,
|
||||
ssa.OpAMD64VPDPBUSDSMasked512,
|
||||
ssa.OpAMD64VADDPSMasked128Merging,
|
||||
ssa.OpAMD64VADDPSMasked256Merging,
|
||||
ssa.OpAMD64VADDPSMasked512Merging,
|
||||
ssa.OpAMD64VADDPDMasked128Merging,
|
||||
ssa.OpAMD64VADDPDMasked256Merging,
|
||||
ssa.OpAMD64VADDPDMasked512Merging,
|
||||
ssa.OpAMD64VPADDBMasked128Merging,
|
||||
ssa.OpAMD64VPADDBMasked256Merging,
|
||||
ssa.OpAMD64VPADDBMasked512Merging,
|
||||
ssa.OpAMD64VPADDWMasked128Merging,
|
||||
ssa.OpAMD64VPADDWMasked256Merging,
|
||||
ssa.OpAMD64VPADDWMasked512Merging,
|
||||
ssa.OpAMD64VPADDDMasked128Merging,
|
||||
ssa.OpAMD64VPADDDMasked256Merging,
|
||||
ssa.OpAMD64VPADDDMasked512Merging,
|
||||
ssa.OpAMD64VPADDQMasked128Merging,
|
||||
ssa.OpAMD64VPADDQMasked256Merging,
|
||||
ssa.OpAMD64VPADDQMasked512Merging,
|
||||
ssa.OpAMD64VPADDSBMasked128Merging,
|
||||
ssa.OpAMD64VPADDSBMasked256Merging,
|
||||
ssa.OpAMD64VPADDSBMasked512Merging,
|
||||
ssa.OpAMD64VPADDSWMasked128Merging,
|
||||
ssa.OpAMD64VPADDSWMasked256Merging,
|
||||
ssa.OpAMD64VPADDSWMasked512Merging,
|
||||
ssa.OpAMD64VPADDUSBMasked128Merging,
|
||||
ssa.OpAMD64VPADDUSBMasked256Merging,
|
||||
ssa.OpAMD64VPADDUSBMasked512Merging,
|
||||
ssa.OpAMD64VPADDUSWMasked128Merging,
|
||||
ssa.OpAMD64VPADDUSWMasked256Merging,
|
||||
ssa.OpAMD64VPADDUSWMasked512Merging,
|
||||
ssa.OpAMD64VPANDDMasked128Merging,
|
||||
ssa.OpAMD64VPANDDMasked256Merging,
|
||||
ssa.OpAMD64VPANDDMasked512Merging,
|
||||
ssa.OpAMD64VPANDQMasked128Merging,
|
||||
ssa.OpAMD64VPANDQMasked256Merging,
|
||||
ssa.OpAMD64VPANDQMasked512Merging,
|
||||
ssa.OpAMD64VPAVGBMasked128Merging,
|
||||
ssa.OpAMD64VPAVGBMasked256Merging,
|
||||
ssa.OpAMD64VPAVGBMasked512Merging,
|
||||
ssa.OpAMD64VPAVGWMasked128Merging,
|
||||
ssa.OpAMD64VPAVGWMasked256Merging,
|
||||
ssa.OpAMD64VPAVGWMasked512Merging,
|
||||
ssa.OpAMD64VPACKSSDWMasked128Merging,
|
||||
ssa.OpAMD64VPACKSSDWMasked256Merging,
|
||||
ssa.OpAMD64VPACKSSDWMasked512Merging,
|
||||
ssa.OpAMD64VPACKUSDWMasked128Merging,
|
||||
ssa.OpAMD64VPACKUSDWMasked256Merging,
|
||||
ssa.OpAMD64VPACKUSDWMasked512Merging,
|
||||
ssa.OpAMD64VDIVPSMasked128Merging,
|
||||
ssa.OpAMD64VDIVPSMasked256Merging,
|
||||
ssa.OpAMD64VDIVPSMasked512Merging,
|
||||
ssa.OpAMD64VDIVPDMasked128Merging,
|
||||
ssa.OpAMD64VDIVPDMasked256Merging,
|
||||
ssa.OpAMD64VDIVPDMasked512Merging,
|
||||
ssa.OpAMD64VPMADDWDMasked128Merging,
|
||||
ssa.OpAMD64VPMADDWDMasked256Merging,
|
||||
ssa.OpAMD64VPMADDWDMasked512Merging,
|
||||
ssa.OpAMD64VPMADDUBSWMasked128Merging,
|
||||
ssa.OpAMD64VPMADDUBSWMasked256Merging,
|
||||
ssa.OpAMD64VPMADDUBSWMasked512Merging,
|
||||
ssa.OpAMD64VGF2P8MULBMasked128Merging,
|
||||
ssa.OpAMD64VGF2P8MULBMasked256Merging,
|
||||
ssa.OpAMD64VGF2P8MULBMasked512Merging,
|
||||
ssa.OpAMD64VMAXPSMasked128Merging,
|
||||
ssa.OpAMD64VMAXPSMasked256Merging,
|
||||
ssa.OpAMD64VMAXPSMasked512Merging,
|
||||
ssa.OpAMD64VMAXPDMasked128Merging,
|
||||
ssa.OpAMD64VMAXPDMasked256Merging,
|
||||
ssa.OpAMD64VMAXPDMasked512Merging,
|
||||
ssa.OpAMD64VPMAXSBMasked128Merging,
|
||||
ssa.OpAMD64VPMAXSBMasked256Merging,
|
||||
ssa.OpAMD64VPMAXSBMasked512Merging,
|
||||
ssa.OpAMD64VPMAXSWMasked128Merging,
|
||||
ssa.OpAMD64VPMAXSWMasked256Merging,
|
||||
ssa.OpAMD64VPMAXSWMasked512Merging,
|
||||
ssa.OpAMD64VPMAXSDMasked128Merging,
|
||||
ssa.OpAMD64VPMAXSDMasked256Merging,
|
||||
ssa.OpAMD64VPMAXSDMasked512Merging,
|
||||
ssa.OpAMD64VPMAXSQMasked128Merging,
|
||||
ssa.OpAMD64VPMAXSQMasked256Merging,
|
||||
ssa.OpAMD64VPMAXSQMasked512Merging,
|
||||
ssa.OpAMD64VPMAXUBMasked128Merging,
|
||||
ssa.OpAMD64VPMAXUBMasked256Merging,
|
||||
ssa.OpAMD64VPMAXUBMasked512Merging,
|
||||
ssa.OpAMD64VPMAXUWMasked128Merging,
|
||||
ssa.OpAMD64VPMAXUWMasked256Merging,
|
||||
ssa.OpAMD64VPMAXUWMasked512Merging,
|
||||
ssa.OpAMD64VPMAXUDMasked128Merging,
|
||||
ssa.OpAMD64VPMAXUDMasked256Merging,
|
||||
ssa.OpAMD64VPMAXUDMasked512Merging,
|
||||
ssa.OpAMD64VPMAXUQMasked128Merging,
|
||||
ssa.OpAMD64VPMAXUQMasked256Merging,
|
||||
ssa.OpAMD64VPMAXUQMasked512Merging,
|
||||
ssa.OpAMD64VMINPSMasked128Merging,
|
||||
ssa.OpAMD64VMINPSMasked256Merging,
|
||||
ssa.OpAMD64VMINPSMasked512Merging,
|
||||
ssa.OpAMD64VMINPDMasked128Merging,
|
||||
ssa.OpAMD64VMINPDMasked256Merging,
|
||||
ssa.OpAMD64VMINPDMasked512Merging,
|
||||
ssa.OpAMD64VPMINSBMasked128Merging,
|
||||
ssa.OpAMD64VPMINSBMasked256Merging,
|
||||
ssa.OpAMD64VPMINSBMasked512Merging,
|
||||
ssa.OpAMD64VPMINSWMasked128Merging,
|
||||
ssa.OpAMD64VPMINSWMasked256Merging,
|
||||
ssa.OpAMD64VPMINSWMasked512Merging,
|
||||
ssa.OpAMD64VPMINSDMasked128Merging,
|
||||
ssa.OpAMD64VPMINSDMasked256Merging,
|
||||
ssa.OpAMD64VPMINSDMasked512Merging,
|
||||
ssa.OpAMD64VPMINSQMasked128Merging,
|
||||
ssa.OpAMD64VPMINSQMasked256Merging,
|
||||
ssa.OpAMD64VPMINSQMasked512Merging,
|
||||
ssa.OpAMD64VPMINUBMasked128Merging,
|
||||
ssa.OpAMD64VPMINUBMasked256Merging,
|
||||
ssa.OpAMD64VPMINUBMasked512Merging,
|
||||
ssa.OpAMD64VPMINUWMasked128Merging,
|
||||
ssa.OpAMD64VPMINUWMasked256Merging,
|
||||
ssa.OpAMD64VPMINUWMasked512Merging,
|
||||
ssa.OpAMD64VPMINUDMasked128Merging,
|
||||
ssa.OpAMD64VPMINUDMasked256Merging,
|
||||
ssa.OpAMD64VPMINUDMasked512Merging,
|
||||
ssa.OpAMD64VPMINUQMasked128Merging,
|
||||
ssa.OpAMD64VPMINUQMasked256Merging,
|
||||
ssa.OpAMD64VPMINUQMasked512Merging,
|
||||
ssa.OpAMD64VFMADD213PSMasked128,
|
||||
ssa.OpAMD64VFMADD213PSMasked256,
|
||||
ssa.OpAMD64VFMADD213PSMasked512,
|
||||
|
|
@ -1237,12 +1354,39 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VFMADDSUB213PDMasked128,
|
||||
ssa.OpAMD64VFMADDSUB213PDMasked256,
|
||||
ssa.OpAMD64VFMADDSUB213PDMasked512,
|
||||
ssa.OpAMD64VPMULHWMasked128Merging,
|
||||
ssa.OpAMD64VPMULHWMasked256Merging,
|
||||
ssa.OpAMD64VPMULHWMasked512Merging,
|
||||
ssa.OpAMD64VPMULHUWMasked128Merging,
|
||||
ssa.OpAMD64VPMULHUWMasked256Merging,
|
||||
ssa.OpAMD64VPMULHUWMasked512Merging,
|
||||
ssa.OpAMD64VMULPSMasked128Merging,
|
||||
ssa.OpAMD64VMULPSMasked256Merging,
|
||||
ssa.OpAMD64VMULPSMasked512Merging,
|
||||
ssa.OpAMD64VMULPDMasked128Merging,
|
||||
ssa.OpAMD64VMULPDMasked256Merging,
|
||||
ssa.OpAMD64VMULPDMasked512Merging,
|
||||
ssa.OpAMD64VPMULLWMasked128Merging,
|
||||
ssa.OpAMD64VPMULLWMasked256Merging,
|
||||
ssa.OpAMD64VPMULLWMasked512Merging,
|
||||
ssa.OpAMD64VPMULLDMasked128Merging,
|
||||
ssa.OpAMD64VPMULLDMasked256Merging,
|
||||
ssa.OpAMD64VPMULLDMasked512Merging,
|
||||
ssa.OpAMD64VPMULLQMasked128Merging,
|
||||
ssa.OpAMD64VPMULLQMasked256Merging,
|
||||
ssa.OpAMD64VPMULLQMasked512Merging,
|
||||
ssa.OpAMD64VFMSUBADD213PSMasked128,
|
||||
ssa.OpAMD64VFMSUBADD213PSMasked256,
|
||||
ssa.OpAMD64VFMSUBADD213PSMasked512,
|
||||
ssa.OpAMD64VFMSUBADD213PDMasked128,
|
||||
ssa.OpAMD64VFMSUBADD213PDMasked256,
|
||||
ssa.OpAMD64VFMSUBADD213PDMasked512,
|
||||
ssa.OpAMD64VPORDMasked128Merging,
|
||||
ssa.OpAMD64VPORDMasked256Merging,
|
||||
ssa.OpAMD64VPORDMasked512Merging,
|
||||
ssa.OpAMD64VPORQMasked128Merging,
|
||||
ssa.OpAMD64VPORQMasked256Merging,
|
||||
ssa.OpAMD64VPORQMasked512Merging,
|
||||
ssa.OpAMD64VPERMI2BMasked128,
|
||||
ssa.OpAMD64VPERMI2BMasked256,
|
||||
ssa.OpAMD64VPERMI2BMasked512,
|
||||
|
|
@ -1261,6 +1405,45 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPERMI2QMasked256,
|
||||
ssa.OpAMD64VPERMI2PDMasked512,
|
||||
ssa.OpAMD64VPERMI2QMasked512,
|
||||
ssa.OpAMD64VPSHUFBMasked256Merging,
|
||||
ssa.OpAMD64VPSHUFBMasked512Merging,
|
||||
ssa.OpAMD64VPSHUFBMasked128Merging,
|
||||
ssa.OpAMD64VPROLVDMasked128Merging,
|
||||
ssa.OpAMD64VPROLVDMasked256Merging,
|
||||
ssa.OpAMD64VPROLVDMasked512Merging,
|
||||
ssa.OpAMD64VPROLVQMasked128Merging,
|
||||
ssa.OpAMD64VPROLVQMasked256Merging,
|
||||
ssa.OpAMD64VPROLVQMasked512Merging,
|
||||
ssa.OpAMD64VPRORVDMasked128Merging,
|
||||
ssa.OpAMD64VPRORVDMasked256Merging,
|
||||
ssa.OpAMD64VPRORVDMasked512Merging,
|
||||
ssa.OpAMD64VPRORVQMasked128Merging,
|
||||
ssa.OpAMD64VPRORVQMasked256Merging,
|
||||
ssa.OpAMD64VPRORVQMasked512Merging,
|
||||
ssa.OpAMD64VSCALEFPSMasked128Merging,
|
||||
ssa.OpAMD64VSCALEFPSMasked256Merging,
|
||||
ssa.OpAMD64VSCALEFPSMasked512Merging,
|
||||
ssa.OpAMD64VSCALEFPDMasked128Merging,
|
||||
ssa.OpAMD64VSCALEFPDMasked256Merging,
|
||||
ssa.OpAMD64VSCALEFPDMasked512Merging,
|
||||
ssa.OpAMD64VPSHLDWMasked128Merging,
|
||||
ssa.OpAMD64VPSHLDWMasked256Merging,
|
||||
ssa.OpAMD64VPSHLDWMasked512Merging,
|
||||
ssa.OpAMD64VPSHLDDMasked128Merging,
|
||||
ssa.OpAMD64VPSHLDDMasked256Merging,
|
||||
ssa.OpAMD64VPSHLDDMasked512Merging,
|
||||
ssa.OpAMD64VPSHLDQMasked128Merging,
|
||||
ssa.OpAMD64VPSHLDQMasked256Merging,
|
||||
ssa.OpAMD64VPSHLDQMasked512Merging,
|
||||
ssa.OpAMD64VPSHRDWMasked128Merging,
|
||||
ssa.OpAMD64VPSHRDWMasked256Merging,
|
||||
ssa.OpAMD64VPSHRDWMasked512Merging,
|
||||
ssa.OpAMD64VPSHRDDMasked128Merging,
|
||||
ssa.OpAMD64VPSHRDDMasked256Merging,
|
||||
ssa.OpAMD64VPSHRDDMasked512Merging,
|
||||
ssa.OpAMD64VPSHRDQMasked128Merging,
|
||||
ssa.OpAMD64VPSHRDQMasked256Merging,
|
||||
ssa.OpAMD64VPSHRDQMasked512Merging,
|
||||
ssa.OpAMD64VPSHLDVWMasked128,
|
||||
ssa.OpAMD64VPSHLDVWMasked256,
|
||||
ssa.OpAMD64VPSHLDVWMasked512,
|
||||
|
|
@ -1270,6 +1453,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPSHLDVQMasked128,
|
||||
ssa.OpAMD64VPSHLDVQMasked256,
|
||||
ssa.OpAMD64VPSHLDVQMasked512,
|
||||
ssa.OpAMD64VPSLLVWMasked128Merging,
|
||||
ssa.OpAMD64VPSLLVWMasked256Merging,
|
||||
ssa.OpAMD64VPSLLVWMasked512Merging,
|
||||
ssa.OpAMD64VPSLLVDMasked128Merging,
|
||||
ssa.OpAMD64VPSLLVDMasked256Merging,
|
||||
ssa.OpAMD64VPSLLVDMasked512Merging,
|
||||
ssa.OpAMD64VPSLLVQMasked128Merging,
|
||||
ssa.OpAMD64VPSLLVQMasked256Merging,
|
||||
ssa.OpAMD64VPSLLVQMasked512Merging,
|
||||
ssa.OpAMD64VPSHRDVWMasked128,
|
||||
ssa.OpAMD64VPSHRDVWMasked256,
|
||||
ssa.OpAMD64VPSHRDVWMasked512,
|
||||
|
|
@ -1278,7 +1470,61 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPSHRDVDMasked512,
|
||||
ssa.OpAMD64VPSHRDVQMasked128,
|
||||
ssa.OpAMD64VPSHRDVQMasked256,
|
||||
ssa.OpAMD64VPSHRDVQMasked512:
|
||||
ssa.OpAMD64VPSHRDVQMasked512,
|
||||
ssa.OpAMD64VPSRAVWMasked128Merging,
|
||||
ssa.OpAMD64VPSRAVWMasked256Merging,
|
||||
ssa.OpAMD64VPSRAVWMasked512Merging,
|
||||
ssa.OpAMD64VPSRAVDMasked128Merging,
|
||||
ssa.OpAMD64VPSRAVDMasked256Merging,
|
||||
ssa.OpAMD64VPSRAVDMasked512Merging,
|
||||
ssa.OpAMD64VPSRAVQMasked128Merging,
|
||||
ssa.OpAMD64VPSRAVQMasked256Merging,
|
||||
ssa.OpAMD64VPSRAVQMasked512Merging,
|
||||
ssa.OpAMD64VPSRLVWMasked128Merging,
|
||||
ssa.OpAMD64VPSRLVWMasked256Merging,
|
||||
ssa.OpAMD64VPSRLVWMasked512Merging,
|
||||
ssa.OpAMD64VPSRLVDMasked128Merging,
|
||||
ssa.OpAMD64VPSRLVDMasked256Merging,
|
||||
ssa.OpAMD64VPSRLVDMasked512Merging,
|
||||
ssa.OpAMD64VPSRLVQMasked128Merging,
|
||||
ssa.OpAMD64VPSRLVQMasked256Merging,
|
||||
ssa.OpAMD64VPSRLVQMasked512Merging,
|
||||
ssa.OpAMD64VSUBPSMasked128Merging,
|
||||
ssa.OpAMD64VSUBPSMasked256Merging,
|
||||
ssa.OpAMD64VSUBPSMasked512Merging,
|
||||
ssa.OpAMD64VSUBPDMasked128Merging,
|
||||
ssa.OpAMD64VSUBPDMasked256Merging,
|
||||
ssa.OpAMD64VSUBPDMasked512Merging,
|
||||
ssa.OpAMD64VPSUBBMasked128Merging,
|
||||
ssa.OpAMD64VPSUBBMasked256Merging,
|
||||
ssa.OpAMD64VPSUBBMasked512Merging,
|
||||
ssa.OpAMD64VPSUBWMasked128Merging,
|
||||
ssa.OpAMD64VPSUBWMasked256Merging,
|
||||
ssa.OpAMD64VPSUBWMasked512Merging,
|
||||
ssa.OpAMD64VPSUBDMasked128Merging,
|
||||
ssa.OpAMD64VPSUBDMasked256Merging,
|
||||
ssa.OpAMD64VPSUBDMasked512Merging,
|
||||
ssa.OpAMD64VPSUBQMasked128Merging,
|
||||
ssa.OpAMD64VPSUBQMasked256Merging,
|
||||
ssa.OpAMD64VPSUBQMasked512Merging,
|
||||
ssa.OpAMD64VPSUBSBMasked128Merging,
|
||||
ssa.OpAMD64VPSUBSBMasked256Merging,
|
||||
ssa.OpAMD64VPSUBSBMasked512Merging,
|
||||
ssa.OpAMD64VPSUBSWMasked128Merging,
|
||||
ssa.OpAMD64VPSUBSWMasked256Merging,
|
||||
ssa.OpAMD64VPSUBSWMasked512Merging,
|
||||
ssa.OpAMD64VPSUBUSBMasked128Merging,
|
||||
ssa.OpAMD64VPSUBUSBMasked256Merging,
|
||||
ssa.OpAMD64VPSUBUSBMasked512Merging,
|
||||
ssa.OpAMD64VPSUBUSWMasked128Merging,
|
||||
ssa.OpAMD64VPSUBUSWMasked256Merging,
|
||||
ssa.OpAMD64VPSUBUSWMasked512Merging,
|
||||
ssa.OpAMD64VPXORDMasked128Merging,
|
||||
ssa.OpAMD64VPXORDMasked256Merging,
|
||||
ssa.OpAMD64VPXORDMasked512Merging,
|
||||
ssa.OpAMD64VPXORQMasked128Merging,
|
||||
ssa.OpAMD64VPXORQMasked256Merging,
|
||||
ssa.OpAMD64VPXORQMasked512Merging:
|
||||
p = simdV3kvResultInArg0(s, v)
|
||||
|
||||
case ssa.OpAMD64VPSLLW128,
|
||||
|
|
@ -1979,6 +2225,199 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
case ssa.OpAMD64SHA256RNDS2128:
|
||||
p = simdV31x0AtIn2ResultInArg0(s, v)
|
||||
|
||||
case ssa.OpAMD64VPABSBMasked128Merging,
|
||||
ssa.OpAMD64VPABSBMasked256Merging,
|
||||
ssa.OpAMD64VPABSBMasked512Merging,
|
||||
ssa.OpAMD64VPABSWMasked128Merging,
|
||||
ssa.OpAMD64VPABSWMasked256Merging,
|
||||
ssa.OpAMD64VPABSWMasked512Merging,
|
||||
ssa.OpAMD64VPABSDMasked128Merging,
|
||||
ssa.OpAMD64VPABSDMasked256Merging,
|
||||
ssa.OpAMD64VPABSDMasked512Merging,
|
||||
ssa.OpAMD64VPABSQMasked128Merging,
|
||||
ssa.OpAMD64VPABSQMasked256Merging,
|
||||
ssa.OpAMD64VPABSQMasked512Merging,
|
||||
ssa.OpAMD64VBROADCASTSSMasked128Merging,
|
||||
ssa.OpAMD64VPBROADCASTQMasked128Merging,
|
||||
ssa.OpAMD64VPBROADCASTBMasked128Merging,
|
||||
ssa.OpAMD64VPBROADCASTWMasked128Merging,
|
||||
ssa.OpAMD64VPBROADCASTDMasked128Merging,
|
||||
ssa.OpAMD64VBROADCASTSSMasked256Merging,
|
||||
ssa.OpAMD64VBROADCASTSDMasked256Merging,
|
||||
ssa.OpAMD64VPBROADCASTBMasked256Merging,
|
||||
ssa.OpAMD64VPBROADCASTWMasked256Merging,
|
||||
ssa.OpAMD64VPBROADCASTDMasked256Merging,
|
||||
ssa.OpAMD64VPBROADCASTQMasked256Merging,
|
||||
ssa.OpAMD64VBROADCASTSSMasked512Merging,
|
||||
ssa.OpAMD64VBROADCASTSDMasked512Merging,
|
||||
ssa.OpAMD64VPBROADCASTBMasked512Merging,
|
||||
ssa.OpAMD64VPBROADCASTWMasked512Merging,
|
||||
ssa.OpAMD64VPBROADCASTDMasked512Merging,
|
||||
ssa.OpAMD64VPBROADCASTQMasked512Merging,
|
||||
ssa.OpAMD64VRNDSCALEPSMasked128Merging,
|
||||
ssa.OpAMD64VRNDSCALEPSMasked256Merging,
|
||||
ssa.OpAMD64VRNDSCALEPSMasked512Merging,
|
||||
ssa.OpAMD64VRNDSCALEPDMasked128Merging,
|
||||
ssa.OpAMD64VRNDSCALEPDMasked256Merging,
|
||||
ssa.OpAMD64VRNDSCALEPDMasked512Merging,
|
||||
ssa.OpAMD64VREDUCEPSMasked128Merging,
|
||||
ssa.OpAMD64VREDUCEPSMasked256Merging,
|
||||
ssa.OpAMD64VREDUCEPSMasked512Merging,
|
||||
ssa.OpAMD64VREDUCEPDMasked128Merging,
|
||||
ssa.OpAMD64VREDUCEPDMasked256Merging,
|
||||
ssa.OpAMD64VREDUCEPDMasked512Merging,
|
||||
ssa.OpAMD64VPMOVWBMasked128Merging,
|
||||
ssa.OpAMD64VPMOVWBMasked256Merging,
|
||||
ssa.OpAMD64VPMOVDBMasked128Merging,
|
||||
ssa.OpAMD64VPMOVQBMasked128Merging,
|
||||
ssa.OpAMD64VPMOVSWBMasked128Merging,
|
||||
ssa.OpAMD64VPMOVSWBMasked256Merging,
|
||||
ssa.OpAMD64VPMOVSDBMasked128Merging,
|
||||
ssa.OpAMD64VPMOVSQBMasked128Merging,
|
||||
ssa.OpAMD64VPMOVSXBWMasked256Merging,
|
||||
ssa.OpAMD64VPMOVSXBWMasked512Merging,
|
||||
ssa.OpAMD64VPMOVDWMasked128Merging,
|
||||
ssa.OpAMD64VPMOVDWMasked256Merging,
|
||||
ssa.OpAMD64VPMOVQWMasked128Merging,
|
||||
ssa.OpAMD64VPMOVSDWMasked128Merging,
|
||||
ssa.OpAMD64VPMOVSDWMasked256Merging,
|
||||
ssa.OpAMD64VPMOVSQWMasked128Merging,
|
||||
ssa.OpAMD64VPMOVSXBWMasked128Merging,
|
||||
ssa.OpAMD64VCVTTPS2DQMasked128Merging,
|
||||
ssa.OpAMD64VCVTTPS2DQMasked256Merging,
|
||||
ssa.OpAMD64VCVTTPS2DQMasked512Merging,
|
||||
ssa.OpAMD64VPMOVSXBDMasked512Merging,
|
||||
ssa.OpAMD64VPMOVSXWDMasked256Merging,
|
||||
ssa.OpAMD64VPMOVSXWDMasked512Merging,
|
||||
ssa.OpAMD64VPMOVQDMasked128Merging,
|
||||
ssa.OpAMD64VPMOVQDMasked256Merging,
|
||||
ssa.OpAMD64VPMOVSQDMasked128Merging,
|
||||
ssa.OpAMD64VPMOVSQDMasked256Merging,
|
||||
ssa.OpAMD64VPMOVSXBDMasked128Merging,
|
||||
ssa.OpAMD64VPMOVSXWDMasked128Merging,
|
||||
ssa.OpAMD64VPMOVSXBDMasked256Merging,
|
||||
ssa.OpAMD64VPMOVSXWQMasked512Merging,
|
||||
ssa.OpAMD64VPMOVSXDQMasked256Merging,
|
||||
ssa.OpAMD64VPMOVSXDQMasked512Merging,
|
||||
ssa.OpAMD64VPMOVSXBQMasked128Merging,
|
||||
ssa.OpAMD64VPMOVSXWQMasked128Merging,
|
||||
ssa.OpAMD64VPMOVSXDQMasked128Merging,
|
||||
ssa.OpAMD64VPMOVSXBQMasked256Merging,
|
||||
ssa.OpAMD64VPMOVSXBQMasked512Merging,
|
||||
ssa.OpAMD64VPMOVUSWBMasked128Merging,
|
||||
ssa.OpAMD64VPMOVUSWBMasked256Merging,
|
||||
ssa.OpAMD64VPMOVUSDBMasked128Merging,
|
||||
ssa.OpAMD64VPMOVUSQBMasked128Merging,
|
||||
ssa.OpAMD64VPMOVZXBWMasked256Merging,
|
||||
ssa.OpAMD64VPMOVZXBWMasked512Merging,
|
||||
ssa.OpAMD64VPMOVUSDWMasked128Merging,
|
||||
ssa.OpAMD64VPMOVUSDWMasked256Merging,
|
||||
ssa.OpAMD64VPMOVUSQWMasked128Merging,
|
||||
ssa.OpAMD64VPMOVZXBWMasked128Merging,
|
||||
ssa.OpAMD64VCVTPS2UDQMasked128Merging,
|
||||
ssa.OpAMD64VCVTPS2UDQMasked256Merging,
|
||||
ssa.OpAMD64VCVTPS2UDQMasked512Merging,
|
||||
ssa.OpAMD64VPMOVZXBDMasked512Merging,
|
||||
ssa.OpAMD64VPMOVZXWDMasked256Merging,
|
||||
ssa.OpAMD64VPMOVZXWDMasked512Merging,
|
||||
ssa.OpAMD64VPMOVUSQDMasked128Merging,
|
||||
ssa.OpAMD64VPMOVUSQDMasked256Merging,
|
||||
ssa.OpAMD64VPMOVZXBDMasked128Merging,
|
||||
ssa.OpAMD64VPMOVZXWDMasked128Merging,
|
||||
ssa.OpAMD64VPMOVZXBDMasked256Merging,
|
||||
ssa.OpAMD64VPMOVZXWQMasked512Merging,
|
||||
ssa.OpAMD64VPMOVZXDQMasked256Merging,
|
||||
ssa.OpAMD64VPMOVZXDQMasked512Merging,
|
||||
ssa.OpAMD64VPMOVZXBQMasked128Merging,
|
||||
ssa.OpAMD64VPMOVZXWQMasked128Merging,
|
||||
ssa.OpAMD64VPMOVZXDQMasked128Merging,
|
||||
ssa.OpAMD64VPMOVSXWQMasked256Merging,
|
||||
ssa.OpAMD64VPMOVZXBQMasked256Merging,
|
||||
ssa.OpAMD64VPMOVZXWQMasked256Merging,
|
||||
ssa.OpAMD64VPMOVZXBQMasked512Merging,
|
||||
ssa.OpAMD64VPLZCNTDMasked128Merging,
|
||||
ssa.OpAMD64VPLZCNTDMasked256Merging,
|
||||
ssa.OpAMD64VPLZCNTDMasked512Merging,
|
||||
ssa.OpAMD64VPLZCNTQMasked128Merging,
|
||||
ssa.OpAMD64VPLZCNTQMasked256Merging,
|
||||
ssa.OpAMD64VPLZCNTQMasked512Merging,
|
||||
ssa.OpAMD64VPOPCNTBMasked128Merging,
|
||||
ssa.OpAMD64VPOPCNTBMasked256Merging,
|
||||
ssa.OpAMD64VPOPCNTBMasked512Merging,
|
||||
ssa.OpAMD64VPOPCNTWMasked128Merging,
|
||||
ssa.OpAMD64VPOPCNTWMasked256Merging,
|
||||
ssa.OpAMD64VPOPCNTWMasked512Merging,
|
||||
ssa.OpAMD64VPOPCNTDMasked128Merging,
|
||||
ssa.OpAMD64VPOPCNTDMasked256Merging,
|
||||
ssa.OpAMD64VPOPCNTDMasked512Merging,
|
||||
ssa.OpAMD64VPOPCNTQMasked128Merging,
|
||||
ssa.OpAMD64VPOPCNTQMasked256Merging,
|
||||
ssa.OpAMD64VPOPCNTQMasked512Merging,
|
||||
ssa.OpAMD64VPSHUFDMasked256Merging,
|
||||
ssa.OpAMD64VPSHUFDMasked512Merging,
|
||||
ssa.OpAMD64VPSHUFHWMasked256Merging,
|
||||
ssa.OpAMD64VPSHUFHWMasked512Merging,
|
||||
ssa.OpAMD64VPSHUFHWMasked128Merging,
|
||||
ssa.OpAMD64VPSHUFDMasked128Merging,
|
||||
ssa.OpAMD64VRCP14PSMasked128Merging,
|
||||
ssa.OpAMD64VRCP14PSMasked256Merging,
|
||||
ssa.OpAMD64VRCP14PSMasked512Merging,
|
||||
ssa.OpAMD64VRCP14PDMasked128Merging,
|
||||
ssa.OpAMD64VRCP14PDMasked256Merging,
|
||||
ssa.OpAMD64VRCP14PDMasked512Merging,
|
||||
ssa.OpAMD64VRSQRT14PSMasked128Merging,
|
||||
ssa.OpAMD64VRSQRT14PSMasked256Merging,
|
||||
ssa.OpAMD64VRSQRT14PSMasked512Merging,
|
||||
ssa.OpAMD64VRSQRT14PDMasked128Merging,
|
||||
ssa.OpAMD64VRSQRT14PDMasked256Merging,
|
||||
ssa.OpAMD64VRSQRT14PDMasked512Merging,
|
||||
ssa.OpAMD64VPROLDMasked128Merging,
|
||||
ssa.OpAMD64VPROLDMasked256Merging,
|
||||
ssa.OpAMD64VPROLDMasked512Merging,
|
||||
ssa.OpAMD64VPROLQMasked128Merging,
|
||||
ssa.OpAMD64VPROLQMasked256Merging,
|
||||
ssa.OpAMD64VPROLQMasked512Merging,
|
||||
ssa.OpAMD64VPRORDMasked128Merging,
|
||||
ssa.OpAMD64VPRORDMasked256Merging,
|
||||
ssa.OpAMD64VPRORDMasked512Merging,
|
||||
ssa.OpAMD64VPRORQMasked128Merging,
|
||||
ssa.OpAMD64VPRORQMasked256Merging,
|
||||
ssa.OpAMD64VPRORQMasked512Merging,
|
||||
ssa.OpAMD64VSQRTPSMasked128Merging,
|
||||
ssa.OpAMD64VSQRTPSMasked256Merging,
|
||||
ssa.OpAMD64VSQRTPSMasked512Merging,
|
||||
ssa.OpAMD64VSQRTPDMasked128Merging,
|
||||
ssa.OpAMD64VSQRTPDMasked256Merging,
|
||||
ssa.OpAMD64VSQRTPDMasked512Merging,
|
||||
ssa.OpAMD64VPSLLWMasked128constMerging,
|
||||
ssa.OpAMD64VPSLLWMasked256constMerging,
|
||||
ssa.OpAMD64VPSLLWMasked512constMerging,
|
||||
ssa.OpAMD64VPSLLDMasked128constMerging,
|
||||
ssa.OpAMD64VPSLLDMasked256constMerging,
|
||||
ssa.OpAMD64VPSLLDMasked512constMerging,
|
||||
ssa.OpAMD64VPSLLQMasked128constMerging,
|
||||
ssa.OpAMD64VPSLLQMasked256constMerging,
|
||||
ssa.OpAMD64VPSLLQMasked512constMerging,
|
||||
ssa.OpAMD64VPSRLWMasked128constMerging,
|
||||
ssa.OpAMD64VPSRLWMasked256constMerging,
|
||||
ssa.OpAMD64VPSRLWMasked512constMerging,
|
||||
ssa.OpAMD64VPSRLDMasked128constMerging,
|
||||
ssa.OpAMD64VPSRLDMasked256constMerging,
|
||||
ssa.OpAMD64VPSRLDMasked512constMerging,
|
||||
ssa.OpAMD64VPSRLQMasked128constMerging,
|
||||
ssa.OpAMD64VPSRLQMasked256constMerging,
|
||||
ssa.OpAMD64VPSRLQMasked512constMerging,
|
||||
ssa.OpAMD64VPSRAWMasked128constMerging,
|
||||
ssa.OpAMD64VPSRAWMasked256constMerging,
|
||||
ssa.OpAMD64VPSRAWMasked512constMerging,
|
||||
ssa.OpAMD64VPSRADMasked128constMerging,
|
||||
ssa.OpAMD64VPSRADMasked256constMerging,
|
||||
ssa.OpAMD64VPSRADMasked512constMerging,
|
||||
ssa.OpAMD64VPSRAQMasked128constMerging,
|
||||
ssa.OpAMD64VPSRAQMasked256constMerging,
|
||||
ssa.OpAMD64VPSRAQMasked512constMerging:
|
||||
p = simdV2kvResultInArg0(s, v)
|
||||
|
||||
default:
|
||||
// Unknown reg shape
|
||||
return false
|
||||
|
|
@ -2843,12 +3282,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPXORQMasked256load,
|
||||
ssa.OpAMD64VPXORQMasked512,
|
||||
ssa.OpAMD64VPXORQMasked512load,
|
||||
ssa.OpAMD64VMOVUPSMasked128,
|
||||
ssa.OpAMD64VMOVUPSMasked256,
|
||||
ssa.OpAMD64VMOVUPSMasked512,
|
||||
ssa.OpAMD64VMOVUPDMasked128,
|
||||
ssa.OpAMD64VMOVUPDMasked256,
|
||||
ssa.OpAMD64VMOVUPDMasked512,
|
||||
ssa.OpAMD64VMOVDQU8Masked128,
|
||||
ssa.OpAMD64VMOVDQU8Masked256,
|
||||
ssa.OpAMD64VMOVDQU8Masked512,
|
||||
|
|
|
|||
|
|
@ -1963,6 +1963,22 @@ func simdV2kv(s *ssagen.State, v *ssa.Value) *obj.Prog {
|
|||
return p
|
||||
}
|
||||
|
||||
// Example instruction: VPABSB X1, X2, K3 (masking merging)
|
||||
func simdV2kvResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog {
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = simdReg(v.Args[1])
|
||||
// These "simd*" series of functions assumes:
|
||||
// Any "K" register that serves as the write-mask
|
||||
// or "predicate" for "predicated AVX512 instructions"
|
||||
// sits right at the end of the operand list.
|
||||
// TODO: verify this assumption.
|
||||
p.AddRestSourceReg(maskReg(v.Args[2]))
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = simdReg(v)
|
||||
return p
|
||||
}
|
||||
|
||||
// This function is to accustomize the shifts.
|
||||
// The 2nd arg is an XMM, and this function merely checks that.
|
||||
// Example instruction: VPSLLQ Z1, X1, K1, Z2
|
||||
|
|
|
|||
|
|
@ -213,7 +213,7 @@ func init() {
|
|||
vloadk = regInfo{inputs: []regMask{gpspsb, mask, 0}, outputs: vonly}
|
||||
vstorek = regInfo{inputs: []regMask{gpspsb, mask, v, 0}}
|
||||
|
||||
v11 = regInfo{inputs: vzonly, outputs: vonly}
|
||||
v11 = regInfo{inputs: vonly, outputs: vonly} // used in resultInArg0 ops, arg0 must not be x15
|
||||
v21 = regInfo{inputs: []regMask{v, vz}, outputs: vonly} // used in resultInArg0 ops, arg0 must not be x15
|
||||
vk = regInfo{inputs: vzonly, outputs: maskonly}
|
||||
kv = regInfo{inputs: maskonly, outputs: vonly}
|
||||
|
|
@ -231,13 +231,13 @@ func init() {
|
|||
gpv = regInfo{inputs: []regMask{gp}, outputs: vonly}
|
||||
v2flags = regInfo{inputs: []regMask{vz, vz}}
|
||||
|
||||
w11 = regInfo{inputs: wzonly, outputs: wonly}
|
||||
w11 = regInfo{inputs: wonly, outputs: wonly} // used in resultInArg0 ops, arg0 must not be x15
|
||||
w21 = regInfo{inputs: []regMask{wz, wz}, outputs: wonly}
|
||||
wk = regInfo{inputs: wzonly, outputs: maskonly}
|
||||
kw = regInfo{inputs: maskonly, outputs: wonly}
|
||||
w2k = regInfo{inputs: []regMask{wz, wz}, outputs: maskonly}
|
||||
wkw = regInfo{inputs: []regMask{wz, mask}, outputs: wonly}
|
||||
w2kw = regInfo{inputs: []regMask{wz, wz, mask}, outputs: wonly}
|
||||
w2kw = regInfo{inputs: []regMask{w, wz, mask}, outputs: wonly} // used in resultInArg0 ops, arg0 must not be x15
|
||||
w2kk = regInfo{inputs: []regMask{wz, wz, mask}, outputs: maskonly}
|
||||
w31 = regInfo{inputs: []regMask{w, wz, wz}, outputs: wonly} // used in resultInArg0 ops, arg0 must not be x15
|
||||
w3kw = regInfo{inputs: []regMask{w, wz, wz, mask}, outputs: wonly} // used in resultInArg0 ops, arg0 must not be x15
|
||||
|
|
|
|||
|
|
@ -1862,6 +1862,424 @@
|
|||
(VMOVDQU64Masked128 (VPSRAQ128const [a] x) mask) => (VPSRAQMasked128const [a] x mask)
|
||||
(VMOVDQU64Masked256 (VPSRAQ256const [a] x) mask) => (VPSRAQMasked256const [a] x mask)
|
||||
(VMOVDQU64Masked512 (VPSRAQ512const [a] x) mask) => (VPSRAQMasked512const [a] x mask)
|
||||
(VPBLENDMQMasked512 dst (VPSLLQ512const [a] x) mask) => (VPSLLQMasked512constMerging dst [a] x mask)
|
||||
(VPBLENDVB256 dst (VPMOVSXBW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VPMOVSDW256 x) mask) => (VPMOVSDWMasked256Merging dst x mask)
|
||||
(VPBLENDMDMasked512 dst (VPLZCNTD512 x) mask) => (VPLZCNTDMasked512Merging dst x mask)
|
||||
(VPBLENDMWMasked512 dst (VPMAXSW512 x y) mask) => (VPMAXSWMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPMINUD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDMWMasked512 dst (VPMULHW512 x y) mask) => (VPMULHWMasked512Merging dst x y mask)
|
||||
(VPBLENDMDMasked512 dst (VPMULLD512 x y) mask) => (VPMULLDMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPROLQ128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLQMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMADDUBSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDUBSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMAXSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPADDSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPADDUSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VBROADCASTSS256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSSMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMOVSXBW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMINSQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VMULPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMBMasked512 dst (VGF2P8MULB512 x y) mask) => (VGF2P8MULBMasked512Merging dst x y mask)
|
||||
(VPBLENDMDMasked512 dst (VMAXPS512 x y) mask) => (VMAXPSMasked512Merging dst x y mask)
|
||||
(VPBLENDVB256 dst (VPOPCNTB256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTBMasked256Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VSUBPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VPSUBQ512 x y) mask) => (VPSUBQMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPSUBUSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMOVSXBQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBQMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VPMOVUSDB128 x) mask) => (VPMOVUSDBMasked128Merging dst x mask)
|
||||
(VPBLENDVB256 dst (VPMAXUQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VRSQRT14PS512 x) mask) => (VRSQRT14PSMasked512Merging dst x mask)
|
||||
(VPBLENDVB256 dst (VPROLD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VPROLQ512 [a] x) mask) => (VPROLQMasked512Merging dst [a] x mask)
|
||||
(VPBLENDMQMasked512 dst (VPSLLVQ512 x y) mask) => (VPSLLVQMasked512Merging dst x y mask)
|
||||
(VPBLENDVB256 dst (VPSRAVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VADDPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VADDPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMOVSXDQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMOVUSWB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSWBMasked128Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMOVZXWQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWQMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMULLW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMBMasked512 dst (VPOPCNTB512 x) mask) => (VPOPCNTBMasked512Merging dst x mask)
|
||||
(VPBLENDVB128 dst (VPSHLDQ128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSRAQ256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VPMOVDW256 x) mask) => (VPMOVDWMasked256Merging dst x mask)
|
||||
(VPBLENDMQMasked512 dst (VPMOVUSQB128 x) mask) => (VPMOVUSQBMasked128Merging dst x mask)
|
||||
(VPBLENDVB256 dst (VCVTPS2UDQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTPS2UDQMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMOVZXBQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBQMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMAXSQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMINSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPOPCNTW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VRCP14PS512 x) mask) => (VRCP14PSMasked512Merging dst x mask)
|
||||
(VPBLENDVB128 dst (VPBROADCASTW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMWMasked512 dst (VPMOVWB256 x) mask) => (VPMOVWBMasked256Merging dst x mask)
|
||||
(VPBLENDVB128 dst (VPRORVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSHLDD256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSLLVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSRLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSUBUSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VREDUCEPS512 [a] x) mask) => (VREDUCEPSMasked512Merging dst [a] x mask)
|
||||
(VPBLENDVB256 dst (VPMAXSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VMINPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VPADDQ512 x y) mask) => (VPADDQMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VBROADCASTSD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSDMasked256Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VRNDSCALEPD512 [a] x) mask) => (VRNDSCALEPDMasked512Merging dst [a] x mask)
|
||||
(VPBLENDVB128 dst (VPMOVZXDQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMINSD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSRAQ128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPADDSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VRNDSCALEPS256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPACKUSDW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKUSDWMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDMWMasked512 dst (VPMADDUBSW512 x y) mask) => (VPMADDUBSWMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPLZCNTD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMAXUD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPOPCNTB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPROLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VPABSQ512 x) mask) => (VPABSQMasked512Merging dst x mask)
|
||||
(VPBLENDVB128 dst (VBROADCASTSD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSDMasked512Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VMINPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMULHW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDMWMasked512 dst (VPSHLDW512 [a] x y) mask) => (VPSHLDWMasked512Merging dst [a] x y mask)
|
||||
(VPBLENDVB128 dst (VPSHRDW128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VADDPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VADDPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMOVZXWD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWDMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMOVSXWQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VDIVPS512 x y) mask) => (VDIVPSMasked512Merging dst x y mask)
|
||||
(VPBLENDVB256 dst (VDIVPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPLZCNTQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSUBSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VREDUCEPD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VPMOVUSQD256 x) mask) => (VPMOVUSQDMasked256Merging dst x mask)
|
||||
(VPBLENDVB128 dst (VPMOVZXBD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDMWMasked512 dst (VPMULHUW512 x y) mask) => (VPMULHUWMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPRORQ128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORQMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSLLVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSRLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMBMasked512 dst (VPSUBSB512 x y) mask) => (VPSUBSBMasked512Merging dst x y mask)
|
||||
(VPBLENDVB256 dst (VPADDD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMOVSXBW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMOVSDW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSDWMasked128Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMINSD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VADDPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VADDPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VADDPD512 x y) mask) => (VADDPDMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPMOVSXBD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBDMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMOVSXDQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDMWMasked512 dst (VPMOVUSWB256 x) mask) => (VPMOVUSWBMasked256Merging dst x mask)
|
||||
(VPBLENDVB256 dst (VPOPCNTD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPROLVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSRLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPADDUSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMAXSD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMINUB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMULLQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VSQRTPD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSUBD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VREDUCEPS256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMWMasked512 dst (VPMINSW512 x y) mask) => (VPMINSWMasked512Merging dst x y mask)
|
||||
(VPBLENDMQMasked512 dst (VRCP14PD512 x) mask) => (VRCP14PDMasked512Merging dst x mask)
|
||||
(VPBLENDMWMasked512 dst (VPSRAVW512 x y) mask) => (VPSRAVWMasked512Merging dst x y mask)
|
||||
(VPBLENDMDMasked512 dst (VPSRLVD512 x y) mask) => (VPSRLVDMasked512Merging dst x y mask)
|
||||
(VPBLENDMDMasked512 dst (VPSUBD512 x y) mask) => (VPSUBDMasked512Merging dst x y mask)
|
||||
(VPBLENDVB256 dst (VPSUBQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPBROADCASTD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTDMasked512Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMOVSXWD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked512Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMADDWD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDWDMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VGF2P8MULB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VGF2P8MULBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPROLD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSLLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSRAD128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRADMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSRLVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSUBUSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPADDUSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMOVZXBW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMOVZXDQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPROLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPADDB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VPROLD512 [a] x) mask) => (VPROLDMasked512Merging dst [a] x mask)
|
||||
(VPBLENDMQMasked512 dst (VPSRLVQ512 x y) mask) => (VPSRLVQMasked512Merging dst x y mask)
|
||||
(VPBLENDMBMasked512 dst (VPSUBB512 x y) mask) => (VPSUBBMasked512Merging dst x y mask)
|
||||
(VPBLENDVB256 dst (VPADDW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPADDQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPADDUSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPBROADCASTB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VRNDSCALEPS128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VREDUCEPD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMINUW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VPORD512 x y) mask) => (VPORDMasked512Merging dst x y mask)
|
||||
(VPBLENDVB256 dst (VRNDSCALEPD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMINSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMULLD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSHUFB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPRORD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPRORVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VPRORVQ512 x y) mask) => (VPRORVQMasked512Merging dst x y mask)
|
||||
(VPBLENDVB256 dst (VPSHLDW256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VCVTTPS2DQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTTPS2DQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VCVTTPS2DQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTTPS2DQMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VMINPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VPSHLDD512 [a] x y) mask) => (VPSHLDDMasked512Merging dst [a] x y mask)
|
||||
(VPBLENDMQMasked512 dst (VPSRAVQ512 x y) mask) => (VPSRAVQMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VSUBPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VSUBPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSUBD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMWMasked512 dst (VPADDW512 x y) mask) => (VPADDWMasked512Merging dst x y mask)
|
||||
(VPBLENDMQMasked512 dst (VPANDQ512 x y) mask) => (VPANDQMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPBROADCASTB512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTBMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VPACKUSDW512 x y) mask) => (VPACKUSDWMasked512Merging dst x y mask)
|
||||
(VPBLENDMWMasked512 dst (VPSHUFHW512 [a] x) mask) => (VPSHUFHWMasked512Merging dst [a] x mask)
|
||||
(VPBLENDVB128 dst (VRCP14PD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRCP14PDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDMWMasked512 dst (VPSHRDW512 [a] x y) mask) => (VPSHRDWMasked512Merging dst [a] x y mask)
|
||||
(VPBLENDVB256 dst (VSQRTPS256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPSMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMWMasked512 dst (VPSUBSW512 x y) mask) => (VPSUBSWMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPMOVSXWD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPBROADCASTW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTWMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPBROADCASTD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTDMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VPMOVQB128 x) mask) => (VPMOVQBMasked128Merging dst x mask)
|
||||
(VPBLENDVB256 dst (VPACKUSDW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKUSDWMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMBMasked512 dst (VPMINSB512 x y) mask) => (VPMINSBMasked512Merging dst x y mask)
|
||||
(VPBLENDVB256 dst (VPMULLD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPADDB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(VPBLENDMBMasked512 dst (VPADDB512 x y) mask) => (VPADDBMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPADDD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMOVWB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVWBMasked128Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMADDWD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDWDMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VPMAXSD512 x y) mask) => (VPMAXSDMasked512Merging dst x y mask)
|
||||
(VPBLENDMQMasked512 dst (VPSHLDQ512 [a] x y) mask) => (VPSHLDQMasked512Merging dst [a] x y mask)
|
||||
(VPBLENDVB128 dst (VBROADCASTSS128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSSMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMOVQD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVQDMasked128Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMOVSXDQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VDIVPD512 x y) mask) => (VDIVPDMasked512Merging dst x y mask)
|
||||
(VPBLENDMDMasked512 dst (VADDPS512 x y) mask) => (VADDPSMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPMOVSXBD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBDMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VPMOVUSDW256 x) mask) => (VPMOVUSDWMasked256Merging dst x mask)
|
||||
(VPBLENDVB256 dst (VPMULHUW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMULLQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPROLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VPROLVQ512 x y) mask) => (VPROLVQMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPSHLDW128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMOVUSDW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDWMasked128Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMAXUQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMULLW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPRORD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VPRORQ512 [a] x) mask) => (VPRORQMasked512Merging dst [a] x mask)
|
||||
(VPBLENDVB128 dst (VPSHLDD128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSRAVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VSUBPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPBROADCASTQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMINUD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSHUFD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPRORVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSLLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDMWMasked512 dst (VPSUBUSW512 x y) mask) => (VPSUBUSWMasked512Merging dst x y mask)
|
||||
(VPBLENDMDMasked512 dst (VPMOVSDB128 x) mask) => (VPMOVSDBMasked128Merging dst x mask)
|
||||
(VPBLENDVB256 dst (VPMOVUSQD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQDMasked128Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDMBMasked512 dst (VPMAXUB512 x y) mask) => (VPMAXUBMasked512Merging dst x y mask)
|
||||
(VPBLENDMQMasked512 dst (VPMINSQ512 x y) mask) => (VPMINSQMasked512Merging dst x y mask)
|
||||
(VPBLENDMQMasked512 dst (VSQRTPD512 x) mask) => (VSQRTPDMasked512Merging dst x mask)
|
||||
(VPBLENDMDMasked512 dst (VSUBPS512 x y) mask) => (VSUBPSMasked512Merging dst x y mask)
|
||||
(VPBLENDVB256 dst (VPSUBUSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VPMAXUD512 x y) mask) => (VPMAXUDMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VBROADCASTSS512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSSMasked512Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VPMOVSQD256 x) mask) => (VPMOVSQDMasked256Merging dst x mask)
|
||||
(VPBLENDVB128 dst (VPMOVZXBD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMOVZXBQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBQMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VRSQRT14PD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRSQRT14PDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VPRORD512 [a] x) mask) => (VPRORDMasked512Merging dst [a] x mask)
|
||||
(VPBLENDMWMasked512 dst (VPSUBW512 x y) mask) => (VPSUBWMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPABSW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPADDSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(VPBLENDMBMasked512 dst (VPADDUSB512 x y) mask) => (VPADDUSBMasked512Merging dst x y mask)
|
||||
(VPBLENDVB256 dst (VPMOVZXWD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWDMasked512Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VMINPD512 x y) mask) => (VMINPDMasked512Merging dst x y mask)
|
||||
(VPBLENDMQMasked512 dst (VPMULLQ512 x y) mask) => (VPMULLQMasked512Merging dst x y mask)
|
||||
(VPBLENDMDMasked512 dst (VPROLVD512 x y) mask) => (VPROLVDMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPSUBW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VCVTTPS2DQ512 x) mask) => (VCVTTPS2DQMasked512Merging dst x mask)
|
||||
(VPBLENDVB128 dst (VPMOVZXWQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWQMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMWMasked512 dst (VPMADDWD512 x y) mask) => (VPMADDWDMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VGF2P8MULB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VGF2P8MULBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPROLQ256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLQMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDMWMasked512 dst (VPSLLVW512 x y) mask) => (VPSLLVWMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPABSD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPAVGB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(VPBLENDMBMasked512 dst (VPAVGB512 x y) mask) => (VPAVGBMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPBROADCASTB256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTBMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VMAXPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDMBMasked512 dst (VPMINUB512 x y) mask) => (VPMINUBMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPMINUQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VMULPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VMAXPD512 x y) mask) => (VMAXPDMasked512Merging dst x y mask)
|
||||
(VPBLENDMBMasked512 dst (VPMAXSB512 x y) mask) => (VPMAXSBMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPMULHUW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VMULPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPRORVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSUBB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VPACKSSDW512 x y) mask) => (VPACKSSDWMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VCVTPS2UDQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTPS2UDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMOVZXDQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMINUB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VPRORVD512 x y) mask) => (VPRORVDMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VSCALEFPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSLLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSLLW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDMWMasked512 dst (VPABSW512 x) mask) => (VPABSWMasked512Merging dst x mask)
|
||||
(VPBLENDVB128 dst (VPMOVSXBQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBQMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VSCALEFPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSLLQ256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPADDW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VMULPD512 x y) mask) => (VMULPDMasked512Merging dst x y mask)
|
||||
(VPBLENDMQMasked512 dst (VPORQ512 x y) mask) => (VPORQMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPMOVSXWD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VPMOVUSQW128 x) mask) => (VPMOVUSQWMasked128Merging dst x mask)
|
||||
(VPBLENDVB256 dst (VPMINSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VRSQRT14PD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRSQRT14PDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSRAW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPABSQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VREDUCEPD512 [a] x) mask) => (VREDUCEPDMasked512Merging dst [a] x mask)
|
||||
(VPBLENDVB128 dst (VPMULHW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSHUFHW256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFHWMasked256Merging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDMWMasked512 dst (VPSRAW512const [a] x) mask) => (VPSRAWMasked512constMerging dst [a] x mask)
|
||||
(VPBLENDMDMasked512 dst (VPADDD512 x y) mask) => (VPADDDMasked512Merging dst x y mask)
|
||||
(VPBLENDMQMasked512 dst (VPOPCNTQ512 x) mask) => (VPOPCNTQMasked512Merging dst x mask)
|
||||
(VPBLENDVB128 dst (VPSHRDD128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSUBB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSUBSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDMBMasked512 dst (VPSUBUSB512 x y) mask) => (VPSUBUSBMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPADDSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMWMasked512 dst (VPADDUSW512 x y) mask) => (VPADDUSWMasked512Merging dst x y mask)
|
||||
(VPBLENDVB256 dst (VMAXPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMAXSD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMINSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VMULPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VRNDSCALEPS512 [a] x) mask) => (VRNDSCALEPSMasked512Merging dst [a] x mask)
|
||||
(VPBLENDMDMasked512 dst (VCVTPS2UDQ512 x) mask) => (VCVTPS2UDQMasked512Merging dst x mask)
|
||||
(VPBLENDVB256 dst (VDIVPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMAXSQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VMINPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSHUFD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDMBMasked512 dst (VPSHUFB512 x y) mask) => (VPSHUFBMasked512Merging dst x y mask)
|
||||
(VPBLENDVB256 dst (VPSHLDQ256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPBROADCASTQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTQMasked512Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VREDUCEPS128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMOVZXWQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWQMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VSCALEFPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VPSHRDD512 [a] x y) mask) => (VPSHRDDMasked512Merging dst [a] x y mask)
|
||||
(VPBLENDVB128 dst (VPSRAVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VSQRTPD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VPXORQ512 x y) mask) => (VPXORQMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPAVGW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMOVSWB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSWBMasked128Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VDIVPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VDIVPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMINSQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDMWMasked512 dst (VPOPCNTW512 x) mask) => (VPOPCNTWMasked512Merging dst x mask)
|
||||
(VPBLENDVB128 dst (VPOPCNTD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VPOPCNTD512 x) mask) => (VPOPCNTDMasked512Merging dst x mask)
|
||||
(VPBLENDVB256 dst (VPABSD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPBROADCASTQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTQMasked256Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VRNDSCALEPD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VPMOVDB128 x) mask) => (VPMOVDBMasked128Merging dst x mask)
|
||||
(VPBLENDVB128 dst (VPMOVSXWQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMINUW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDMWMasked512 dst (VPMINUW512 x y) mask) => (VPMINUWMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPOPCNTQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VPMOVQD256 x) mask) => (VPMOVQDMasked256Merging dst x mask)
|
||||
(VPBLENDVB256 dst (VPSHRDW256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VPSRAD512const [a] x) mask) => (VPSRADMasked512constMerging dst [a] x mask)
|
||||
(VPBLENDVB128 dst (VPAVGB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDMWMasked512 dst (VPAVGW512 x y) mask) => (VPAVGWMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPMOVSXBQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBQMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMOVZXBW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMAXSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMAXUD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VPMAXUQ512 x y) mask) => (VPMAXUQMasked512Merging dst x y mask)
|
||||
(VPBLENDMDMasked512 dst (VMINPS512 x y) mask) => (VMINPSMasked512Merging dst x y mask)
|
||||
(VPBLENDMBMasked512 dst (VPABSB512 x) mask) => (VPABSBMasked512Merging dst x mask)
|
||||
(VPBLENDMDMasked512 dst (VPANDD512 x y) mask) => (VPANDDMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPMOVZXBW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMOVZXBD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMAXSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VPSHUFD512 [a] x) mask) => (VPSHUFDMasked512Merging dst [a] x mask)
|
||||
(VPBLENDVB128 dst (VPSHUFHW128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFHWMasked128Merging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSHRDQ256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMADDUBSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDUBSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VPMINSD512 x y) mask) => (VPMINSDMasked512Merging dst x y mask)
|
||||
(VPBLENDMDMasked512 dst (VPSRAVD512 x y) mask) => (VPSRAVDMasked512Merging dst x y mask)
|
||||
(VPBLENDMQMasked512 dst (VSUBPD512 x y) mask) => (VSUBPDMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPSLLW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSLLD256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMWMasked512 dst (VPMOVSWB256 x) mask) => (VPMOVSWBMasked256Merging dst x mask)
|
||||
(VPBLENDMQMasked512 dst (VPMOVQW128 x) mask) => (VPMOVQWMasked128Merging dst x mask)
|
||||
(VPBLENDVB256 dst (VPMINUQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VRCP14PD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRCP14PDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSHRDD256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VPSHRDQ512 [a] x y) mask) => (VPSHRDQMasked512Merging dst [a] x y mask)
|
||||
(VPBLENDVB128 dst (VPSLLVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSRLVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPADDQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMOVSXWQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VPLZCNTQ512 x) mask) => (VPLZCNTQMasked512Merging dst x mask)
|
||||
(VPBLENDVB256 dst (VPMAXUB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPRORQ256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORQMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VSCALEFPD512 x y) mask) => (VSCALEFPDMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPSUBQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSLLD128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VADDPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VADDPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VPMOVSQW128 x) mask) => (VPMOVSQWMasked128Merging dst x mask)
|
||||
(VPBLENDMWMasked512 dst (VPMAXUW512 x y) mask) => (VPMAXUWMasked512Merging dst x y mask)
|
||||
(VPBLENDVB256 dst (VPSHUFB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSRLVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSLLQ128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSRAD256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRADMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VPMINUQ512 x y) mask) => (VPMINUQMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPSRAVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDMWMasked512 dst (VPSRLVW512 x y) mask) => (VPSRLVWMasked512Merging dst x y mask)
|
||||
(VPBLENDVB256 dst (VPSUBW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSRAW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPABSW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSWMasked256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPACKSSDW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKSSDWMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMOVSQD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSQDMasked128Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMOVSXBD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBDMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMOVZXBQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBQMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPLZCNTD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPLZCNTQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VMAXPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPAVGW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPACKSSDW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKSSDWMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMOVZXWD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWDMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPOPCNTQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSRAVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSUBSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VPXORD512 x y) mask) => (VPXORDMasked512Merging dst x y mask)
|
||||
(VPBLENDMBMasked512 dst (VPADDSB512 x y) mask) => (VPADDSBMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPBROADCASTD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VMAXPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMAXUW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSHRDQ128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VPSLLVD512 x y) mask) => (VPSLLVDMasked512Merging dst x y mask)
|
||||
(VPBLENDMWMasked512 dst (VPSLLW512const [a] x) mask) => (VPSLLWMasked512constMerging dst [a] x mask)
|
||||
(VPBLENDMDMasked512 dst (VPSLLD512const [a] x) mask) => (VPSLLDMasked512constMerging dst [a] x mask)
|
||||
(VPBLENDMWMasked512 dst (VPADDSW512 x y) mask) => (VPADDSWMasked512Merging dst x y mask)
|
||||
(VPBLENDMQMasked512 dst (VPMOVSQB128 x) mask) => (VPMOVSQBMasked128Merging dst x mask)
|
||||
(VPBLENDMDMasked512 dst (VPMINUD512 x y) mask) => (VPMINUDMasked512Merging dst x y mask)
|
||||
(VPBLENDVB256 dst (VPOPCNTW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTWMasked256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VRSQRT14PD512 x) mask) => (VRSQRT14PDMasked512Merging dst x mask)
|
||||
(VPBLENDMDMasked512 dst (VSCALEFPS512 x y) mask) => (VSCALEFPSMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VPMAXUW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSRAVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VSQRTPS512 x) mask) => (VSQRTPSMasked512Merging dst x mask)
|
||||
(VPBLENDMQMasked512 dst (VPSRAQ512const [a] x) mask) => (VPSRAQMasked512constMerging dst [a] x mask)
|
||||
(VPBLENDVB128 dst (VPABSB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPABSB256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSBMasked256Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPABSQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPMOVDW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVDWMasked128Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDMQMasked512 dst (VPMAXSQ512 x y) mask) => (VPMAXSQMasked512Merging dst x y mask)
|
||||
(VPBLENDVB128 dst (VSCALEFPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VSQRTPS128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPSMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSUBSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VPABSD512 x) mask) => (VPABSDMasked512Merging dst x mask)
|
||||
(VPBLENDVB128 dst (VPBROADCASTW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTWMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPMAXUB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDMDMasked512 dst (VMULPS512 x y) mask) => (VMULPSMasked512Merging dst x y mask)
|
||||
(VPBLENDMWMasked512 dst (VPMULLW512 x y) mask) => (VPMULLWMasked512Merging dst x y mask)
|
||||
(VPABSD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSD512load {sym} [off] ptr mem)
|
||||
(VPABSQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSQ128load {sym} [off] ptr mem)
|
||||
(VPABSQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSQ256load {sym} [off] ptr mem)
|
||||
|
|
|
|||
|
|
@ -167,12 +167,6 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
|||
{name: "VMOVDQU64Masked128", argLength: 2, reg: wkw, asm: "VMOVDQU64", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VMOVDQU64Masked256", argLength: 2, reg: wkw, asm: "VMOVDQU64", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VMOVDQU64Masked512", argLength: 2, reg: wkw, asm: "VMOVDQU64", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VMOVUPDMasked128", argLength: 2, reg: wkw, asm: "VMOVUPD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VMOVUPDMasked256", argLength: 2, reg: wkw, asm: "VMOVUPD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VMOVUPDMasked512", argLength: 2, reg: wkw, asm: "VMOVUPD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VMOVUPSMasked128", argLength: 2, reg: wkw, asm: "VMOVUPS", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VMOVUPSMasked256", argLength: 2, reg: wkw, asm: "VMOVUPS", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VMOVUPSMasked512", argLength: 2, reg: wkw, asm: "VMOVUPS", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VMULPD128", argLength: 2, reg: v21, asm: "VMULPD", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VMULPD256", argLength: 2, reg: v21, asm: "VMULPD", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VMULPD512", argLength: 2, reg: w21, asm: "VMULPD", commutative: true, typ: "Vec512", resultInArg0: false},
|
||||
|
|
@ -1900,5 +1894,448 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
|||
{name: "VRNDSCALEPSMasked512load", argLength: 3, reg: wkwload, asm: "VRNDSCALEPS", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||
{name: "VSHUFPD512load", argLength: 3, reg: w21load, asm: "VSHUFPD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||
{name: "VSHUFPS512load", argLength: 3, reg: w21load, asm: "VSHUFPS", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||
{name: "VADDPDMasked128Merging", argLength: 4, reg: w3kw, asm: "VADDPD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VADDPDMasked256Merging", argLength: 4, reg: w3kw, asm: "VADDPD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VADDPDMasked512Merging", argLength: 4, reg: w3kw, asm: "VADDPD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VADDPSMasked128Merging", argLength: 4, reg: w3kw, asm: "VADDPS", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VADDPSMasked256Merging", argLength: 4, reg: w3kw, asm: "VADDPS", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VADDPSMasked512Merging", argLength: 4, reg: w3kw, asm: "VADDPS", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VBROADCASTSDMasked256Merging", argLength: 3, reg: w2kw, asm: "VBROADCASTSD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VBROADCASTSDMasked512Merging", argLength: 3, reg: w2kw, asm: "VBROADCASTSD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VBROADCASTSSMasked128Merging", argLength: 3, reg: w2kw, asm: "VBROADCASTSS", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VBROADCASTSSMasked256Merging", argLength: 3, reg: w2kw, asm: "VBROADCASTSS", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VBROADCASTSSMasked512Merging", argLength: 3, reg: w2kw, asm: "VBROADCASTSS", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VCVTPS2UDQMasked128Merging", argLength: 3, reg: w2kw, asm: "VCVTPS2UDQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VCVTPS2UDQMasked256Merging", argLength: 3, reg: w2kw, asm: "VCVTPS2UDQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VCVTPS2UDQMasked512Merging", argLength: 3, reg: w2kw, asm: "VCVTPS2UDQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VCVTTPS2DQMasked128Merging", argLength: 3, reg: w2kw, asm: "VCVTTPS2DQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VCVTTPS2DQMasked256Merging", argLength: 3, reg: w2kw, asm: "VCVTTPS2DQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VCVTTPS2DQMasked512Merging", argLength: 3, reg: w2kw, asm: "VCVTTPS2DQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VDIVPDMasked128Merging", argLength: 4, reg: w3kw, asm: "VDIVPD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VDIVPDMasked256Merging", argLength: 4, reg: w3kw, asm: "VDIVPD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VDIVPDMasked512Merging", argLength: 4, reg: w3kw, asm: "VDIVPD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VDIVPSMasked128Merging", argLength: 4, reg: w3kw, asm: "VDIVPS", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VDIVPSMasked256Merging", argLength: 4, reg: w3kw, asm: "VDIVPS", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VDIVPSMasked512Merging", argLength: 4, reg: w3kw, asm: "VDIVPS", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VGF2P8MULBMasked128Merging", argLength: 4, reg: w3kw, asm: "VGF2P8MULB", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VGF2P8MULBMasked256Merging", argLength: 4, reg: w3kw, asm: "VGF2P8MULB", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VGF2P8MULBMasked512Merging", argLength: 4, reg: w3kw, asm: "VGF2P8MULB", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VMAXPDMasked128Merging", argLength: 4, reg: w3kw, asm: "VMAXPD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VMAXPDMasked256Merging", argLength: 4, reg: w3kw, asm: "VMAXPD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VMAXPDMasked512Merging", argLength: 4, reg: w3kw, asm: "VMAXPD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VMAXPSMasked128Merging", argLength: 4, reg: w3kw, asm: "VMAXPS", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VMAXPSMasked256Merging", argLength: 4, reg: w3kw, asm: "VMAXPS", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VMAXPSMasked512Merging", argLength: 4, reg: w3kw, asm: "VMAXPS", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VMINPDMasked128Merging", argLength: 4, reg: w3kw, asm: "VMINPD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VMINPDMasked256Merging", argLength: 4, reg: w3kw, asm: "VMINPD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VMINPDMasked512Merging", argLength: 4, reg: w3kw, asm: "VMINPD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VMINPSMasked128Merging", argLength: 4, reg: w3kw, asm: "VMINPS", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VMINPSMasked256Merging", argLength: 4, reg: w3kw, asm: "VMINPS", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VMINPSMasked512Merging", argLength: 4, reg: w3kw, asm: "VMINPS", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VMULPDMasked128Merging", argLength: 4, reg: w3kw, asm: "VMULPD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VMULPDMasked256Merging", argLength: 4, reg: w3kw, asm: "VMULPD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VMULPDMasked512Merging", argLength: 4, reg: w3kw, asm: "VMULPD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VMULPSMasked128Merging", argLength: 4, reg: w3kw, asm: "VMULPS", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VMULPSMasked256Merging", argLength: 4, reg: w3kw, asm: "VMULPS", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VMULPSMasked512Merging", argLength: 4, reg: w3kw, asm: "VMULPS", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPABSBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPABSB", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPABSBMasked256Merging", argLength: 3, reg: w2kw, asm: "VPABSB", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPABSBMasked512Merging", argLength: 3, reg: w2kw, asm: "VPABSB", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPABSDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPABSD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPABSDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPABSD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPABSDMasked512Merging", argLength: 3, reg: w2kw, asm: "VPABSD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPABSQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPABSQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPABSQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPABSQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPABSQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPABSQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPABSWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPABSW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPABSWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPABSW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPABSWMasked512Merging", argLength: 3, reg: w2kw, asm: "VPABSW", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPACKSSDWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPACKSSDW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPACKSSDWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPACKSSDW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPACKSSDWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPACKSSDW", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPACKUSDWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPACKUSDW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPACKUSDWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPACKUSDW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPACKUSDWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPACKUSDW", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPADDBMasked128Merging", argLength: 4, reg: w3kw, asm: "VPADDB", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPADDBMasked256Merging", argLength: 4, reg: w3kw, asm: "VPADDB", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPADDBMasked512Merging", argLength: 4, reg: w3kw, asm: "VPADDB", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPADDDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPADDD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPADDDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPADDD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPADDDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPADDD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPADDQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPADDQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPADDQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPADDQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPADDQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPADDQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPADDSBMasked128Merging", argLength: 4, reg: w3kw, asm: "VPADDSB", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPADDSBMasked256Merging", argLength: 4, reg: w3kw, asm: "VPADDSB", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPADDSBMasked512Merging", argLength: 4, reg: w3kw, asm: "VPADDSB", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPADDSWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPADDSW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPADDSWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPADDSW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPADDSWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPADDSW", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPADDUSBMasked128Merging", argLength: 4, reg: w3kw, asm: "VPADDUSB", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPADDUSBMasked256Merging", argLength: 4, reg: w3kw, asm: "VPADDUSB", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPADDUSBMasked512Merging", argLength: 4, reg: w3kw, asm: "VPADDUSB", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPADDUSWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPADDUSW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPADDUSWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPADDUSW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPADDUSWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPADDUSW", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPADDWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPADDW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPADDWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPADDW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPADDWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPADDW", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPANDDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPANDD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPANDDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPANDD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPANDDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPANDD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPANDQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPANDQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPANDQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPANDQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPANDQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPANDQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPAVGBMasked128Merging", argLength: 4, reg: w3kw, asm: "VPAVGB", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPAVGBMasked256Merging", argLength: 4, reg: w3kw, asm: "VPAVGB", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPAVGBMasked512Merging", argLength: 4, reg: w3kw, asm: "VPAVGB", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPAVGWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPAVGW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPAVGWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPAVGW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPAVGWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPAVGW", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPBROADCASTBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPBROADCASTB", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPBROADCASTBMasked256Merging", argLength: 3, reg: w2kw, asm: "VPBROADCASTB", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPBROADCASTBMasked512Merging", argLength: 3, reg: w2kw, asm: "VPBROADCASTB", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPBROADCASTDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPBROADCASTD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPBROADCASTDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPBROADCASTD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPBROADCASTDMasked512Merging", argLength: 3, reg: w2kw, asm: "VPBROADCASTD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPBROADCASTQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPBROADCASTQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPBROADCASTQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPBROADCASTQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPBROADCASTQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPBROADCASTQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPBROADCASTWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPBROADCASTW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPBROADCASTWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPBROADCASTW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPBROADCASTWMasked512Merging", argLength: 3, reg: w2kw, asm: "VPBROADCASTW", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPLZCNTDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPLZCNTD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPLZCNTDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPLZCNTD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPLZCNTDMasked512Merging", argLength: 3, reg: w2kw, asm: "VPLZCNTD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPLZCNTQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPLZCNTQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPLZCNTQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPLZCNTQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPLZCNTQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPLZCNTQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMADDUBSWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMADDUBSW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMADDUBSWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMADDUBSW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMADDUBSWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMADDUBSW", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMADDWDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMADDWD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMADDWDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMADDWD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMADDWDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMADDWD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMAXSBMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMAXSB", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMAXSBMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMAXSB", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMAXSBMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMAXSB", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMAXSDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMAXSD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMAXSDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMAXSD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMAXSDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMAXSD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMAXSQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMAXSQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMAXSQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMAXSQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMAXSQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMAXSQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMAXSWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMAXSW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMAXSWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMAXSW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMAXSWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMAXSW", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMAXUBMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMAXUB", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMAXUBMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMAXUB", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMAXUBMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMAXUB", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMAXUDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMAXUD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMAXUDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMAXUD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMAXUDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMAXUD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMAXUQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMAXUQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMAXUQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMAXUQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMAXUQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMAXUQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMAXUWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMAXUW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMAXUWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMAXUW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMAXUWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMAXUW", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMINSBMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMINSB", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMINSBMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMINSB", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMINSBMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMINSB", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMINSDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMINSD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMINSDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMINSD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMINSDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMINSD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMINSQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMINSQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMINSQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMINSQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMINSQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMINSQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMINSWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMINSW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMINSWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMINSW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMINSWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMINSW", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMINUBMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMINUB", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMINUBMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMINUB", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMINUBMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMINUB", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMINUDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMINUD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMINUDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMINUD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMINUDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMINUD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMINUQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMINUQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMINUQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMINUQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMINUQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMINUQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMINUWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMINUW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMINUWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMINUW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMINUWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMINUW", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMOVDBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVDWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVDWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVDW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMOVQBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVQDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVQDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVQD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMOVQWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVSDBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVSDWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVSDWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMOVSQBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVSQDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVSQDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMOVSQWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVSWBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVSWBMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSWB", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMOVSXBDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXBD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVSXBDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXBD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMOVSXBDMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXBD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMOVSXBQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXBQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVSXBQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXBQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMOVSXBQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXBQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMOVSXBWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXBW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVSXBWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXBW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMOVSXBWMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXBW", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMOVSXDQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXDQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVSXDQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXDQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMOVSXDQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXDQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMOVSXWDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVSXWDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMOVSXWDMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMOVSXWQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVSXWQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMOVSXWQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMOVUSDBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVUSDWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVUSDWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMOVUSQBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVUSQDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVUSQDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMOVUSQWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVUSWBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVUSWBMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMOVWBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVWBMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVWB", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMOVZXBDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXBD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVZXBDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXBD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMOVZXBDMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXBD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMOVZXBQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXBQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVZXBQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXBQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMOVZXBQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXBQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMOVZXBWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXBW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVZXBWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXBW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMOVZXBWMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXBW", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMOVZXDQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXDQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVZXDQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXDQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMOVZXDQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXDQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMOVZXWDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXWD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVZXWDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXWD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMOVZXWDMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXWD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMOVZXWQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXWQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMOVZXWQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXWQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMOVZXWQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXWQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMULHUWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMULHUW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMULHUWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMULHUW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMULHUWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMULHUW", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMULHWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMULHW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMULHWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMULHW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMULHWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMULHW", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMULLDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMULLD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMULLDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMULLD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMULLDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMULLD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMULLQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMULLQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMULLQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMULLQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMULLQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMULLQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPMULLWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMULLW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPMULLWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMULLW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPMULLWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMULLW", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPOPCNTBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPOPCNTB", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPOPCNTBMasked256Merging", argLength: 3, reg: w2kw, asm: "VPOPCNTB", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPOPCNTBMasked512Merging", argLength: 3, reg: w2kw, asm: "VPOPCNTB", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPOPCNTDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPOPCNTD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPOPCNTDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPOPCNTD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPOPCNTDMasked512Merging", argLength: 3, reg: w2kw, asm: "VPOPCNTD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPOPCNTQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPOPCNTQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPOPCNTQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPOPCNTQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPOPCNTQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPOPCNTQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPOPCNTWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPOPCNTW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPOPCNTWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPOPCNTW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPOPCNTWMasked512Merging", argLength: 3, reg: w2kw, asm: "VPOPCNTW", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPORDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPORD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPORDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPORD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPORDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPORD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPORQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPORQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPORQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPORQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPORQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPORQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPROLVDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPROLVD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPROLVDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPROLVD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPROLVDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPROLVD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPROLVQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPROLVQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPROLVQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPROLVQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPROLVQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPROLVQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPRORVDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPRORVD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPRORVDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPRORVD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPRORVDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPRORVD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPRORVQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPRORVQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPRORVQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPRORVQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPRORVQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPRORVQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSHUFBMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSHUFB", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSHUFBMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSHUFB", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSHUFBMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSHUFB", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSLLVDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSLLVD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSLLVDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSLLVD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSLLVDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSLLVD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSLLVQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSLLVQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSLLVQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSLLVQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSLLVQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSLLVQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSLLVWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSLLVW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSLLVWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSLLVW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSLLVWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSLLVW", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSRAVDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSRAVD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSRAVDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSRAVD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSRAVDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSRAVD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSRAVQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSRAVQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSRAVQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSRAVQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSRAVQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSRAVQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSRAVWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSRAVW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSRAVWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSRAVW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSRAVWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSRAVW", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSRLVDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSRLVD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSRLVDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSRLVD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSRLVDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSRLVD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSRLVQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSRLVQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSRLVQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSRLVQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSRLVQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSRLVWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSRLVW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSRLVWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSRLVW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSRLVWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSRLVW", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSUBBMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSUBB", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSUBBMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSUBB", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSUBBMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSUBB", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSUBDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSUBD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSUBDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSUBD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSUBDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSUBD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSUBQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSUBQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSUBQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSUBQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSUBQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSUBQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSUBSBMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSUBSB", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSUBSBMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSUBSB", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSUBSBMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSUBSB", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSUBSWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSUBSW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSUBSWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSUBSW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSUBSWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSUBSW", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSUBUSBMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSUBUSB", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSUBUSBMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSUBUSB", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSUBUSBMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSUBUSB", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSUBUSWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSUBUSW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSUBUSWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSUBUSW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSUBUSWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSUBUSW", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSUBWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSUBW", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSUBWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSUBW", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSUBWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSUBW", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPXORDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPXORD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPXORDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPXORD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPXORDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPXORD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPXORQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPXORQ", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPXORQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPXORQ", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPXORQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPXORQ", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VRCP14PDMasked128Merging", argLength: 3, reg: w2kw, asm: "VRCP14PD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VRCP14PDMasked256Merging", argLength: 3, reg: w2kw, asm: "VRCP14PD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VRCP14PDMasked512Merging", argLength: 3, reg: w2kw, asm: "VRCP14PD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VRCP14PSMasked128Merging", argLength: 3, reg: w2kw, asm: "VRCP14PS", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VRCP14PSMasked256Merging", argLength: 3, reg: w2kw, asm: "VRCP14PS", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VRCP14PSMasked512Merging", argLength: 3, reg: w2kw, asm: "VRCP14PS", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VRSQRT14PDMasked128Merging", argLength: 3, reg: w2kw, asm: "VRSQRT14PD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VRSQRT14PDMasked256Merging", argLength: 3, reg: w2kw, asm: "VRSQRT14PD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VRSQRT14PDMasked512Merging", argLength: 3, reg: w2kw, asm: "VRSQRT14PD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VRSQRT14PSMasked128Merging", argLength: 3, reg: w2kw, asm: "VRSQRT14PS", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VRSQRT14PSMasked256Merging", argLength: 3, reg: w2kw, asm: "VRSQRT14PS", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VRSQRT14PSMasked512Merging", argLength: 3, reg: w2kw, asm: "VRSQRT14PS", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VSCALEFPDMasked128Merging", argLength: 4, reg: w3kw, asm: "VSCALEFPD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VSCALEFPDMasked256Merging", argLength: 4, reg: w3kw, asm: "VSCALEFPD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VSCALEFPDMasked512Merging", argLength: 4, reg: w3kw, asm: "VSCALEFPD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VSCALEFPSMasked128Merging", argLength: 4, reg: w3kw, asm: "VSCALEFPS", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VSCALEFPSMasked256Merging", argLength: 4, reg: w3kw, asm: "VSCALEFPS", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VSCALEFPSMasked512Merging", argLength: 4, reg: w3kw, asm: "VSCALEFPS", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VSQRTPDMasked128Merging", argLength: 3, reg: w2kw, asm: "VSQRTPD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VSQRTPDMasked256Merging", argLength: 3, reg: w2kw, asm: "VSQRTPD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VSQRTPDMasked512Merging", argLength: 3, reg: w2kw, asm: "VSQRTPD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VSQRTPSMasked128Merging", argLength: 3, reg: w2kw, asm: "VSQRTPS", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VSQRTPSMasked256Merging", argLength: 3, reg: w2kw, asm: "VSQRTPS", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VSQRTPSMasked512Merging", argLength: 3, reg: w2kw, asm: "VSQRTPS", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VSUBPDMasked128Merging", argLength: 4, reg: w3kw, asm: "VSUBPD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VSUBPDMasked256Merging", argLength: 4, reg: w3kw, asm: "VSUBPD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VSUBPDMasked512Merging", argLength: 4, reg: w3kw, asm: "VSUBPD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VSUBPSMasked128Merging", argLength: 4, reg: w3kw, asm: "VSUBPS", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VSUBPSMasked256Merging", argLength: 4, reg: w3kw, asm: "VSUBPS", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VSUBPSMasked512Merging", argLength: 4, reg: w3kw, asm: "VSUBPS", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPROLDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPROLD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPROLDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPROLD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPROLDMasked512Merging", argLength: 3, reg: w2kw, asm: "VPROLD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPROLQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPROLQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPROLQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPROLQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPROLQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPROLQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPRORDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPRORD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPRORDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPRORD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPRORDMasked512Merging", argLength: 3, reg: w2kw, asm: "VPRORD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPRORQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPRORQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPRORQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPRORQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPRORQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPRORQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSHLDDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSHLDD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSHLDDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSHLDD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSHLDDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSHLDD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSHLDQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSHLDQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSHLDQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSHLDQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSHLDQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSHLDQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSHLDWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSHLDW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSHLDWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSHLDW", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSHLDWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSHLDW", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSHRDDMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSHRDD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSHRDDMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSHRDD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSHRDDMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSHRDD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSHRDQMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSHRDQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSHRDQMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSHRDQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSHRDQMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSHRDQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSHRDWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPSHRDW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSHRDWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPSHRDW", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSHRDWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPSHRDW", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSHUFDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPSHUFD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSHUFDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPSHUFD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSHUFDMasked512Merging", argLength: 3, reg: w2kw, asm: "VPSHUFD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSHUFHWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPSHUFHW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSHUFHWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPSHUFHW", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSHUFHWMasked512Merging", argLength: 3, reg: w2kw, asm: "VPSHUFHW", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSLLDMasked128constMerging", argLength: 3, reg: w2kw, asm: "VPSLLD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSLLDMasked256constMerging", argLength: 3, reg: w2kw, asm: "VPSLLD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSLLDMasked512constMerging", argLength: 3, reg: w2kw, asm: "VPSLLD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSLLQMasked128constMerging", argLength: 3, reg: w2kw, asm: "VPSLLQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSLLQMasked256constMerging", argLength: 3, reg: w2kw, asm: "VPSLLQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSLLQMasked512constMerging", argLength: 3, reg: w2kw, asm: "VPSLLQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSLLWMasked128constMerging", argLength: 3, reg: w2kw, asm: "VPSLLW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSLLWMasked256constMerging", argLength: 3, reg: w2kw, asm: "VPSLLW", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSLLWMasked512constMerging", argLength: 3, reg: w2kw, asm: "VPSLLW", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSRADMasked128constMerging", argLength: 3, reg: w2kw, asm: "VPSRAD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSRADMasked256constMerging", argLength: 3, reg: w2kw, asm: "VPSRAD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSRADMasked512constMerging", argLength: 3, reg: w2kw, asm: "VPSRAD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSRAQMasked128constMerging", argLength: 3, reg: w2kw, asm: "VPSRAQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSRAQMasked256constMerging", argLength: 3, reg: w2kw, asm: "VPSRAQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSRAQMasked512constMerging", argLength: 3, reg: w2kw, asm: "VPSRAQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSRAWMasked128constMerging", argLength: 3, reg: w2kw, asm: "VPSRAW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSRAWMasked256constMerging", argLength: 3, reg: w2kw, asm: "VPSRAW", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSRAWMasked512constMerging", argLength: 3, reg: w2kw, asm: "VPSRAW", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSRLDMasked128constMerging", argLength: 3, reg: w2kw, asm: "VPSRLD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSRLDMasked256constMerging", argLength: 3, reg: w2kw, asm: "VPSRLD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSRLDMasked512constMerging", argLength: 3, reg: w2kw, asm: "VPSRLD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSRLQMasked128constMerging", argLength: 3, reg: w2kw, asm: "VPSRLQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSRLQMasked256constMerging", argLength: 3, reg: w2kw, asm: "VPSRLQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSRLQMasked512constMerging", argLength: 3, reg: w2kw, asm: "VPSRLQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VPSRLWMasked128constMerging", argLength: 3, reg: w2kw, asm: "VPSRLW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VPSRLWMasked256constMerging", argLength: 3, reg: w2kw, asm: "VPSRLW", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VPSRLWMasked512constMerging", argLength: 3, reg: w2kw, asm: "VPSRLW", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VREDUCEPDMasked128Merging", argLength: 3, reg: w2kw, asm: "VREDUCEPD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VREDUCEPDMasked256Merging", argLength: 3, reg: w2kw, asm: "VREDUCEPD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VREDUCEPDMasked512Merging", argLength: 3, reg: w2kw, asm: "VREDUCEPD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VREDUCEPSMasked128Merging", argLength: 3, reg: w2kw, asm: "VREDUCEPS", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VREDUCEPSMasked256Merging", argLength: 3, reg: w2kw, asm: "VREDUCEPS", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VREDUCEPSMasked512Merging", argLength: 3, reg: w2kw, asm: "VREDUCEPS", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VRNDSCALEPDMasked128Merging", argLength: 3, reg: w2kw, asm: "VRNDSCALEPD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VRNDSCALEPDMasked256Merging", argLength: 3, reg: w2kw, asm: "VRNDSCALEPD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VRNDSCALEPDMasked512Merging", argLength: 3, reg: w2kw, asm: "VRNDSCALEPD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VRNDSCALEPSMasked128Merging", argLength: 3, reg: w2kw, asm: "VRNDSCALEPS", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VRNDSCALEPSMasked256Merging", argLength: 3, reg: w2kw, asm: "VRNDSCALEPS", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VRNDSCALEPSMasked512Merging", argLength: 3, reg: w2kw, asm: "VRNDSCALEPS", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
}
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -30,6 +30,12 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
|||
{{- range .OpsDataImmLoad}}
|
||||
{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", aux: "SymValAndOff", symEffect: "Read", resultInArg0: {{.ResultInArg0}}},
|
||||
{{- end}}
|
||||
{{- range .OpsDataMerging }}
|
||||
{name: "{{.OpName}}Merging", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: false, typ: "{{.Type}}", resultInArg0: true},
|
||||
{{- end }}
|
||||
{{- range .OpsDataImmMerging }}
|
||||
{name: "{{.OpName}}Merging", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", aux: "UInt8", commutative: false, typ: "{{.Type}}", resultInArg0: true},
|
||||
{{- end }}
|
||||
}
|
||||
}
|
||||
`
|
||||
|
|
@ -51,10 +57,12 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
|
|||
ResultInArg0 bool
|
||||
}
|
||||
type machineOpsData struct {
|
||||
OpsData []opData
|
||||
OpsDataImm []opData
|
||||
OpsDataLoad []opData
|
||||
OpsDataImmLoad []opData
|
||||
OpsData []opData
|
||||
OpsDataImm []opData
|
||||
OpsDataLoad []opData
|
||||
OpsDataImmLoad []opData
|
||||
OpsDataMerging []opData
|
||||
OpsDataImmMerging []opData
|
||||
}
|
||||
|
||||
regInfoSet := map[string]bool{
|
||||
|
|
@ -66,6 +74,8 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
|
|||
opsDataImm := make([]opData, 0)
|
||||
opsDataLoad := make([]opData, 0)
|
||||
opsDataImmLoad := make([]opData, 0)
|
||||
opsDataMerging := make([]opData, 0)
|
||||
opsDataImmMerging := make([]opData, 0)
|
||||
|
||||
// Determine the "best" version of an instruction to use
|
||||
best := make(map[string]Operation)
|
||||
|
|
@ -98,7 +108,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
|
|||
regInfoMissing := make(map[string]bool, 0)
|
||||
for _, asm := range mOpOrder {
|
||||
op := best[asm]
|
||||
shapeIn, shapeOut, _, _, gOp := op.shape()
|
||||
shapeIn, shapeOut, maskType, _, gOp := op.shape()
|
||||
|
||||
// TODO: all our masked operations are now zeroing, we need to generate machine ops with merging masks, maybe copy
|
||||
// one here with a name suffix "Merging". The rewrite rules will need them.
|
||||
|
|
@ -147,11 +157,13 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
|
|||
resultInArg0 = true
|
||||
}
|
||||
var memOpData *opData
|
||||
regInfoMerging := regInfo
|
||||
hasMerging := false
|
||||
if op.MemFeatures != nil && *op.MemFeatures == "vbcst" {
|
||||
// Right now we only have vbcst case
|
||||
// Make a full vec memory variant.
|
||||
op = rewriteLastVregToMem(op)
|
||||
regInfo, err := makeRegInfo(op, VregMemIn)
|
||||
opMem := rewriteLastVregToMem(op)
|
||||
regInfo, err := makeRegInfo(opMem, VregMemIn)
|
||||
if err != nil {
|
||||
// Just skip it if it's non nill.
|
||||
// an error could be triggered by [checkVecAsScalar].
|
||||
|
|
@ -163,16 +175,51 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
|
|||
memOpData = &opData{asm + "load", gOp.Asm, len(gOp.In) + 1, regInfo, false, outType, resultInArg0}
|
||||
}
|
||||
}
|
||||
hasMerging = gOp.hasMaskedMerging(maskType, shapeOut)
|
||||
if hasMerging && !resultInArg0 {
|
||||
// We have to copy the slice here becasue the sort will be visible from other
|
||||
// aliases when no reslicing is happening.
|
||||
newIn := make([]Operand, len(op.In), len(op.In)+1)
|
||||
copy(newIn, op.In)
|
||||
op.In = newIn
|
||||
op.In = append(op.In, op.Out[0])
|
||||
op.sortOperand()
|
||||
regInfoMerging, err = makeRegInfo(op, NoMem)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn {
|
||||
opsDataImm = append(opsDataImm, opData{asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0})
|
||||
if memOpData != nil {
|
||||
if *op.MemFeatures != "vbcst" {
|
||||
panic("simdgen only knows vbcst for mem ops for now")
|
||||
}
|
||||
opsDataImmLoad = append(opsDataImmLoad, *memOpData)
|
||||
}
|
||||
if hasMerging {
|
||||
mergingLen := len(gOp.In)
|
||||
if !resultInArg0 {
|
||||
mergingLen++
|
||||
}
|
||||
opsDataImmMerging = append(opsDataImmMerging, opData{asm, gOp.Asm, mergingLen, regInfoMerging, gOp.Commutative, outType, resultInArg0})
|
||||
}
|
||||
} else {
|
||||
opsData = append(opsData, opData{asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0})
|
||||
if memOpData != nil {
|
||||
if *op.MemFeatures != "vbcst" {
|
||||
panic("simdgen only knows vbcst for mem ops for now")
|
||||
}
|
||||
opsDataLoad = append(opsDataLoad, *memOpData)
|
||||
}
|
||||
if hasMerging {
|
||||
mergingLen := len(gOp.In)
|
||||
if !resultInArg0 {
|
||||
mergingLen++
|
||||
}
|
||||
opsDataMerging = append(opsDataMerging, opData{asm, gOp.Asm, mergingLen, regInfoMerging, gOp.Commutative, outType, resultInArg0})
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(regInfoErrs) != 0 {
|
||||
|
|
@ -193,7 +240,14 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
|
|||
sort.Slice(opsDataImmLoad, func(i, j int) bool {
|
||||
return compareNatural(opsDataImmLoad[i].OpName, opsDataImmLoad[j].OpName) < 0
|
||||
})
|
||||
err := t.Execute(buffer, machineOpsData{opsData, opsDataImm, opsDataLoad, opsDataImmLoad})
|
||||
sort.Slice(opsDataMerging, func(i, j int) bool {
|
||||
return compareNatural(opsDataMerging[i].OpName, opsDataMerging[j].OpName) < 0
|
||||
})
|
||||
sort.Slice(opsDataImmMerging, func(i, j int) bool {
|
||||
return compareNatural(opsDataImmMerging[i].OpName, opsDataImmMerging[j].OpName) < 0
|
||||
})
|
||||
err := t.Execute(buffer, machineOpsData{opsData, opsDataImm, opsDataLoad, opsDataImmLoad,
|
||||
opsDataMerging, opsDataImmMerging})
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("failed to execute template: %w", err))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -585,8 +585,8 @@ func writeSIMDFeatures(ops []Operation) *bytes.Buffer {
|
|||
return buffer
|
||||
}
|
||||
|
||||
// writeSIMDStubs generates the simd vector intrinsic stubs and writes it to ops_amd64.go and ops_internal_amd64.go
|
||||
// within the specified directory.
|
||||
// writeSIMDStubs returns two bytes.Buffers containing the declarations for the public
|
||||
// and internal-use vector intrinsics.
|
||||
func writeSIMDStubs(ops []Operation, typeMap simdTypeMap) (f, fI *bytes.Buffer) {
|
||||
t := templateOf(simdStubsTmpl, "simdStubs")
|
||||
f = new(bytes.Buffer)
|
||||
|
|
|
|||
|
|
@ -126,6 +126,9 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
|
|||
buffer := new(bytes.Buffer)
|
||||
buffer.WriteString(generatedHeader + "\n")
|
||||
|
||||
// asm -> masked merging rules
|
||||
maskedMergeOpts := make(map[string]string)
|
||||
s2n := map[int]string{8: "B", 16: "W", 32: "D", 64: "Q"}
|
||||
asmCheck := map[string]bool{}
|
||||
var allData []tplRuleData
|
||||
var optData []tplRuleData // for mask peephole optimizations, and other misc
|
||||
|
|
@ -295,6 +298,33 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
|
|||
memOpData.tplName = "vregMem"
|
||||
}
|
||||
memOptData = append(memOptData, memOpData)
|
||||
asmCheck[memOpData.Asm+"load"] = true
|
||||
}
|
||||
}
|
||||
// Generate the masked merging optimization rules
|
||||
if gOp.hasMaskedMerging(maskType, opOutShape) {
|
||||
// TODO: handle customized operand order and special lower.
|
||||
maskElem := gOp.In[len(gOp.In)-1]
|
||||
if maskElem.Bits == nil {
|
||||
panic("mask has no bits")
|
||||
}
|
||||
if maskElem.ElemBits == nil {
|
||||
panic("mask has no elemBits")
|
||||
}
|
||||
if maskElem.Lanes == nil {
|
||||
panic("mask has no lanes")
|
||||
}
|
||||
switch *maskElem.Bits {
|
||||
case 128, 256:
|
||||
// VPBLENDVB cases.
|
||||
noMaskName := machineOpName(NoMask, gOp)
|
||||
maskedMergeOpts[noMaskName] = fmt.Sprintf("(VPBLENDVB%d dst (%s %s) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (%sMerging dst %s (VPMOVVec%dx%dToM <types.TypeMask> mask))\n",
|
||||
*maskElem.Bits, noMaskName, data.Args, data.Asm, data.Args, *maskElem.ElemBits, *maskElem.Lanes)
|
||||
case 512:
|
||||
// VPBLENDM[BWDQ] cases.
|
||||
noMaskName := machineOpName(NoMask, gOp)
|
||||
maskedMergeOpts[noMaskName] = fmt.Sprintf("(VPBLENDM%sMasked%d dst (%s %s) mask) => (%sMerging dst %s mask)\n",
|
||||
s2n[*maskElem.ElemBits], *maskElem.Bits, noMaskName, data.Args, data.Asm, data.Args)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -332,6 +362,13 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
|
|||
}
|
||||
}
|
||||
|
||||
for asm, rule := range maskedMergeOpts {
|
||||
if !asmCheck[asm] {
|
||||
continue
|
||||
}
|
||||
buffer.WriteString(rule)
|
||||
}
|
||||
|
||||
for _, data := range memOptData {
|
||||
if err := ruleTemplates.ExecuteTemplate(buffer, data.tplName, data); err != nil {
|
||||
panic(fmt.Errorf("failed to execute template %s for %s: %w", data.tplName, data.Asm, err))
|
||||
|
|
|
|||
|
|
@ -99,6 +99,7 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
|
|||
"v21ResultInArg0",
|
||||
"v21ResultInArg0Imm8",
|
||||
"v31x0AtIn2ResultInArg0",
|
||||
"v2kvResultInArg0",
|
||||
}
|
||||
regInfoSet := map[string][]string{}
|
||||
for _, key := range regInfoKeys {
|
||||
|
|
@ -107,7 +108,8 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
|
|||
|
||||
seen := map[string]struct{}{}
|
||||
allUnseen := make(map[string][]Operation)
|
||||
classifyOp := func(op Operation, shapeIn inShape, shapeOut outShape, caseStr string, mem memShape) error {
|
||||
allUnseenCaseStr := make(map[string][]string)
|
||||
classifyOp := func(op Operation, maskType maskShape, shapeIn inShape, shapeOut outShape, caseStr string, mem memShape) error {
|
||||
regShape, err := op.regShape(mem)
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
@ -127,8 +129,31 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
|
|||
}
|
||||
if _, ok := regInfoSet[regShape]; !ok {
|
||||
allUnseen[regShape] = append(allUnseen[regShape], op)
|
||||
allUnseenCaseStr[regShape] = append(allUnseenCaseStr[regShape], caseStr)
|
||||
}
|
||||
regInfoSet[regShape] = append(regInfoSet[regShape], caseStr)
|
||||
if mem == NoMem && op.hasMaskedMerging(maskType, shapeOut) {
|
||||
regShapeMerging := regShape
|
||||
if shapeOut != OneVregOutAtIn {
|
||||
// We have to copy the slice here becasue the sort will be visible from other
|
||||
// aliases when no reslicing is happening.
|
||||
newIn := make([]Operand, len(op.In), len(op.In)+1)
|
||||
copy(newIn, op.In)
|
||||
op.In = newIn
|
||||
op.In = append(op.In, op.Out[0])
|
||||
op.sortOperand()
|
||||
regShapeMerging, err = op.regShape(mem)
|
||||
regShapeMerging += "ResultInArg0"
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if _, ok := regInfoSet[regShapeMerging]; !ok {
|
||||
allUnseen[regShapeMerging] = append(allUnseen[regShapeMerging], op)
|
||||
allUnseenCaseStr[regShapeMerging] = append(allUnseenCaseStr[regShapeMerging], caseStr+"Merging")
|
||||
}
|
||||
regInfoSet[regShapeMerging] = append(regInfoSet[regShapeMerging], caseStr+"Merging")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
for _, op := range ops {
|
||||
|
|
@ -146,7 +171,7 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
|
|||
isZeroMasking = true
|
||||
}
|
||||
}
|
||||
if err := classifyOp(op, shapeIn, shapeOut, caseStr, NoMem); err != nil {
|
||||
if err := classifyOp(op, maskType, shapeIn, shapeOut, caseStr, NoMem); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if op.MemFeatures != nil && *op.MemFeatures == "vbcst" {
|
||||
|
|
@ -155,7 +180,7 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
|
|||
// Ignore the error
|
||||
// an error could be triggered by [checkVecAsScalar].
|
||||
// TODO: make [checkVecAsScalar] aware of mem ops.
|
||||
if err := classifyOp(op, shapeIn, shapeOut, caseStr+"load", VregMemIn); err != nil {
|
||||
if err := classifyOp(op, maskType, shapeIn, shapeOut, caseStr+"load", VregMemIn); err != nil {
|
||||
if *Verbose {
|
||||
log.Printf("Seen error: %e", err)
|
||||
}
|
||||
|
|
@ -169,7 +194,7 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
|
|||
for k := range allUnseen {
|
||||
allKeys = append(allKeys, k)
|
||||
}
|
||||
panic(fmt.Errorf("unsupported register constraint for prog, please update gen_simdssa.go and amd64/ssa.go: %+v\nAll keys: %v", allUnseen, allKeys))
|
||||
panic(fmt.Errorf("unsupported register constraint for prog, please update gen_simdssa.go and amd64/ssa.go: %+v\nAll keys: %v\n, cases: %v\n", allUnseen, allKeys, allUnseenCaseStr))
|
||||
}
|
||||
|
||||
buffer := new(bytes.Buffer)
|
||||
|
|
|
|||
|
|
@ -523,10 +523,6 @@ func checkVecAsScalar(op Operation) (idx int, err error) {
|
|||
}
|
||||
}
|
||||
if idx >= 0 {
|
||||
if idx != 1 {
|
||||
err = fmt.Errorf("simdgen only supports TreatLikeAScalarOfSize at the 2nd arg of the arg list: %s", op)
|
||||
return
|
||||
}
|
||||
if sSize != 8 && sSize != 16 && sSize != 32 && sSize != 64 {
|
||||
err = fmt.Errorf("simdgen does not recognize this uint size: %d, %s", sSize, op)
|
||||
return
|
||||
|
|
@ -545,6 +541,10 @@ func rewriteVecAsScalarRegInfo(op Operation, regInfo string) (string, error) {
|
|||
regInfo = "vfpv"
|
||||
} else if regInfo == "v2kv" {
|
||||
regInfo = "vfpkv"
|
||||
} else if regInfo == "v31" {
|
||||
regInfo = "v2fpv"
|
||||
} else if regInfo == "v3kv" {
|
||||
regInfo = "v2fpkv"
|
||||
} else {
|
||||
return "", fmt.Errorf("simdgen does not recognize uses of treatLikeAScalarOfSize with op regShape %s in op: %s", regInfo, op)
|
||||
}
|
||||
|
|
@ -807,6 +807,12 @@ func reportXEDInconsistency(ops []Operation) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (o *Operation) hasMaskedMerging(maskType maskShape, outType outShape) bool {
|
||||
// BLEND and VMOVDQU are not user-facing ops so we should filter them out.
|
||||
return o.OperandOrder == nil && o.SpecialLower == nil && maskType == OneMask && outType == OneVregOut &&
|
||||
len(o.InVariant) == 1 && !strings.Contains(o.Asm, "BLEND") && !strings.Contains(o.Asm, "VMOVDQU")
|
||||
}
|
||||
|
||||
func getVbcstData(s string) (feat1Match, feat2Match string) {
|
||||
_, err := fmt.Sscanf(s, "feat1=%[^;];feat2=%s", &feat1Match, &feat2Match)
|
||||
if err != nil {
|
||||
|
|
|
|||
|
|
@ -299,21 +299,6 @@
|
|||
out:
|
||||
- *v
|
||||
|
||||
# For AVX512
|
||||
- go: move
|
||||
asm: VMOVUP[SD]
|
||||
zeroing: true
|
||||
in:
|
||||
- &v
|
||||
go: $t
|
||||
class: vreg
|
||||
base: float
|
||||
inVariant:
|
||||
-
|
||||
class: mask
|
||||
out:
|
||||
- *v
|
||||
|
||||
- go: Expand
|
||||
asm: "VPEXPAND[BWDQ]|VEXPANDP[SD]"
|
||||
in:
|
||||
|
|
|
|||
|
|
@ -1108,3 +1108,22 @@ func TestSelectTernOptInt32x16(t *testing.T) {
|
|||
}
|
||||
foo(t2, applyTo3(x, y, z, ft2))
|
||||
}
|
||||
|
||||
func TestMaskedMerge(t *testing.T) {
|
||||
x := simd.LoadInt64x4Slice([]int64{1, 2, 3, 4})
|
||||
y := simd.LoadInt64x4Slice([]int64{5, 6, 1, 1})
|
||||
z := simd.LoadInt64x4Slice([]int64{-1, -2, -3, -4})
|
||||
res := make([]int64, 4)
|
||||
expected := []int64{6, 8, -3, -4}
|
||||
mask := x.Less(y)
|
||||
if simd.HasAVX512() {
|
||||
x.Add(y).Merge(z, mask).StoreSlice(res)
|
||||
} else {
|
||||
x.Add(y).Merge(z, mask).StoreSlice(res)
|
||||
}
|
||||
for i := range 4 {
|
||||
if res[i] != expected[i] {
|
||||
t.Errorf("got %d wanted %d", res[i], expected[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -67,3 +67,13 @@ func simdFeatureGuardedMaskOpt() simd.Int16x16 {
|
|||
mask := simd.Mask16x16FromBits(5)
|
||||
return x.Add(y).Masked(mask) // amd64:`VPAND\s.*$`
|
||||
}
|
||||
|
||||
func simdMaskedMerge() simd.Int16x16 {
|
||||
var x, y simd.Int16x16
|
||||
if simd.HasAVX512() {
|
||||
mask := simd.Mask16x16FromBits(5)
|
||||
return x.Add(y).Merge(x, mask) // amd64:-`VPBLENDVB\s.*$`
|
||||
}
|
||||
mask := simd.Mask16x16FromBits(5)
|
||||
return x.Add(y).Merge(x, mask) // amd64:`VPBLENDVB\s.*$`
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue