diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go index 841f57581ff..3bfd4ab777a 100644 --- a/src/cmd/compile/internal/amd64/simdssa.go +++ b/src/cmd/compile/internal/amd64/simdssa.go @@ -42,102 +42,48 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPBROADCASTW512, ssa.OpAMD64VPBROADCASTD512, ssa.OpAMD64VPBROADCASTQ512, - ssa.OpAMD64VPMOVWB128_128, - ssa.OpAMD64VPMOVWB128_256, - ssa.OpAMD64VPMOVWB256, - ssa.OpAMD64VPMOVDB128_128, - ssa.OpAMD64VPMOVDB128_256, - ssa.OpAMD64VPMOVDB128_512, - ssa.OpAMD64VPMOVQB128_128, - ssa.OpAMD64VPMOVQB128_256, - ssa.OpAMD64VPMOVQB128_512, - ssa.OpAMD64VPMOVSWB128_128, - ssa.OpAMD64VPMOVSWB128_256, - ssa.OpAMD64VPMOVSWB256, - ssa.OpAMD64VPMOVSDB128_128, - ssa.OpAMD64VPMOVSDB128_256, - ssa.OpAMD64VPMOVSDB128_512, - ssa.OpAMD64VPMOVSQB128_128, - ssa.OpAMD64VPMOVSQB128_256, - ssa.OpAMD64VPMOVSQB128_512, - ssa.OpAMD64VPMOVSXBW256, - ssa.OpAMD64VPMOVSXBW512, - ssa.OpAMD64VPMOVDW128_128, - ssa.OpAMD64VPMOVDW128_256, - ssa.OpAMD64VPMOVDW256, - ssa.OpAMD64VPMOVQW128_128, - ssa.OpAMD64VPMOVQW128_256, - ssa.OpAMD64VPMOVQW128_512, - ssa.OpAMD64VPMOVSDW128_128, - ssa.OpAMD64VPMOVSDW128_256, - ssa.OpAMD64VPMOVSDW256, - ssa.OpAMD64VPMOVSQW128_128, - ssa.OpAMD64VPMOVSQW128_256, - ssa.OpAMD64VPMOVSQW128_512, - ssa.OpAMD64VPMOVSXBW128, ssa.OpAMD64VCVTTPS2DQ128, ssa.OpAMD64VCVTTPS2DQ256, ssa.OpAMD64VCVTTPS2DQ512, - ssa.OpAMD64VPMOVSXBD512, - ssa.OpAMD64VPMOVSXWD256, - ssa.OpAMD64VPMOVSXWD512, - ssa.OpAMD64VPMOVQD128_128, - ssa.OpAMD64VPMOVQD128_256, - ssa.OpAMD64VPMOVQD256, - ssa.OpAMD64VPMOVSQD128_128, - ssa.OpAMD64VPMOVSQD128_256, - ssa.OpAMD64VPMOVSQD256, - ssa.OpAMD64VPMOVSXBD128, - ssa.OpAMD64VPMOVSXWD128, - ssa.OpAMD64VPMOVSXBD256, - ssa.OpAMD64VPMOVSXWQ512, - ssa.OpAMD64VPMOVSXDQ256, - ssa.OpAMD64VPMOVSXDQ512, - ssa.OpAMD64VPMOVSXBQ128, - ssa.OpAMD64VPMOVSXWQ128, - ssa.OpAMD64VPMOVSXDQ128, - ssa.OpAMD64VPMOVSXBQ256, - ssa.OpAMD64VPMOVSXBQ512, - ssa.OpAMD64VPMOVUSWB128_128, - ssa.OpAMD64VPMOVUSWB128_256, - ssa.OpAMD64VPMOVUSWB256, - ssa.OpAMD64VPMOVUSDB128_128, - ssa.OpAMD64VPMOVUSDB128_256, - ssa.OpAMD64VPMOVUSDB128_512, - ssa.OpAMD64VPMOVUSQB128_128, - ssa.OpAMD64VPMOVUSQB128_256, - ssa.OpAMD64VPMOVUSQB128_512, - ssa.OpAMD64VPMOVZXBW256, - ssa.OpAMD64VPMOVZXBW512, - ssa.OpAMD64VPMOVUSDW128_128, - ssa.OpAMD64VPMOVUSDW128_256, - ssa.OpAMD64VPMOVUSDW256, - ssa.OpAMD64VPMOVUSQW128_128, - ssa.OpAMD64VPMOVUSQW128_256, - ssa.OpAMD64VPMOVUSQW128_512, - ssa.OpAMD64VPMOVZXBW128, ssa.OpAMD64VCVTPS2UDQ128, ssa.OpAMD64VCVTPS2UDQ256, ssa.OpAMD64VCVTPS2UDQ512, - ssa.OpAMD64VPMOVZXBD512, - ssa.OpAMD64VPMOVZXWD256, - ssa.OpAMD64VPMOVZXWD512, - ssa.OpAMD64VPMOVUSQD128_128, - ssa.OpAMD64VPMOVUSQD128_256, - ssa.OpAMD64VPMOVUSQD256, - ssa.OpAMD64VPMOVZXBD128, - ssa.OpAMD64VPMOVZXWD128, - ssa.OpAMD64VPMOVZXBD256, - ssa.OpAMD64VPMOVZXWQ512, - ssa.OpAMD64VPMOVZXDQ256, - ssa.OpAMD64VPMOVZXDQ512, + ssa.OpAMD64VPMOVSXBQ128, + ssa.OpAMD64VPMOVSXWQ128, + ssa.OpAMD64VPMOVSXDQ128, ssa.OpAMD64VPMOVZXBQ128, ssa.OpAMD64VPMOVZXWQ128, ssa.OpAMD64VPMOVZXDQ128, + ssa.OpAMD64VPMOVSXBD128, + ssa.OpAMD64VPMOVSXWD128, + ssa.OpAMD64VPMOVSXBQ256, ssa.OpAMD64VPMOVSXWQ256, + ssa.OpAMD64VPMOVZXBD128, + ssa.OpAMD64VPMOVZXWD128, ssa.OpAMD64VPMOVZXBQ256, ssa.OpAMD64VPMOVZXWQ256, + ssa.OpAMD64VPMOVSXBW128, + ssa.OpAMD64VPMOVSXBD256, + ssa.OpAMD64VPMOVSXBQ512, + ssa.OpAMD64VPMOVZXBW128, + ssa.OpAMD64VPMOVZXBD256, ssa.OpAMD64VPMOVZXBQ512, + ssa.OpAMD64VPMOVSXBW256, + ssa.OpAMD64VPMOVSXBW512, + ssa.OpAMD64VPMOVSXBD512, + ssa.OpAMD64VPMOVSXWD256, + ssa.OpAMD64VPMOVSXWD512, + ssa.OpAMD64VPMOVSXWQ512, + ssa.OpAMD64VPMOVSXDQ256, + ssa.OpAMD64VPMOVSXDQ512, + ssa.OpAMD64VPMOVZXBW256, + ssa.OpAMD64VPMOVZXBW512, + ssa.OpAMD64VPMOVZXBD512, + ssa.OpAMD64VPMOVZXWD256, + ssa.OpAMD64VPMOVZXWD512, + ssa.OpAMD64VPMOVZXWQ512, + ssa.OpAMD64VPMOVZXDQ256, + ssa.OpAMD64VPMOVZXDQ512, ssa.OpAMD64VPLZCNTD128, ssa.OpAMD64VPLZCNTD256, ssa.OpAMD64VPLZCNTD512, @@ -168,12 +114,58 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VRSQRT14PD128, ssa.OpAMD64VRSQRT14PD256, ssa.OpAMD64VRSQRT14PD512, + ssa.OpAMD64VPMOVSWB128_128, + ssa.OpAMD64VPMOVSWB128_256, + ssa.OpAMD64VPMOVSWB256, + ssa.OpAMD64VPMOVSDB128_128, + ssa.OpAMD64VPMOVSDB128_256, + ssa.OpAMD64VPMOVSDB128_512, + ssa.OpAMD64VPMOVSQB128_128, + ssa.OpAMD64VPMOVSQB128_256, + ssa.OpAMD64VPMOVSQB128_512, + ssa.OpAMD64VPMOVSDW128_128, + ssa.OpAMD64VPMOVSDW128_256, + ssa.OpAMD64VPMOVSDW256, + ssa.OpAMD64VPMOVSQW128_128, + ssa.OpAMD64VPMOVSQW128_256, + ssa.OpAMD64VPMOVSQW128_512, + ssa.OpAMD64VPMOVSQD128_128, + ssa.OpAMD64VPMOVSQD128_256, + ssa.OpAMD64VPMOVSQD256, + ssa.OpAMD64VPMOVUSWB256, + ssa.OpAMD64VPMOVUSDW128_128, + ssa.OpAMD64VPMOVUSDW128_256, + ssa.OpAMD64VPMOVUSDW256, + ssa.OpAMD64VPMOVUSQW128_128, + ssa.OpAMD64VPMOVUSQW128_256, + ssa.OpAMD64VPMOVUSQW128_512, + ssa.OpAMD64VPMOVUSQD128_128, + ssa.OpAMD64VPMOVUSQD128_256, + ssa.OpAMD64VPMOVUSQD256, ssa.OpAMD64VSQRTPS128, ssa.OpAMD64VSQRTPS256, ssa.OpAMD64VSQRTPS512, ssa.OpAMD64VSQRTPD128, ssa.OpAMD64VSQRTPD256, - ssa.OpAMD64VSQRTPD512: + ssa.OpAMD64VSQRTPD512, + ssa.OpAMD64VPMOVWB128_128, + ssa.OpAMD64VPMOVWB128_256, + ssa.OpAMD64VPMOVWB256, + ssa.OpAMD64VPMOVDB128_128, + ssa.OpAMD64VPMOVDB128_256, + ssa.OpAMD64VPMOVDB128_512, + ssa.OpAMD64VPMOVQB128_128, + ssa.OpAMD64VPMOVQB128_256, + ssa.OpAMD64VPMOVQB128_512, + ssa.OpAMD64VPMOVDW128_128, + ssa.OpAMD64VPMOVDW128_256, + ssa.OpAMD64VPMOVDW256, + ssa.OpAMD64VPMOVQW128_128, + ssa.OpAMD64VPMOVQW128_256, + ssa.OpAMD64VPMOVQW128_512, + ssa.OpAMD64VPMOVQD128_128, + ssa.OpAMD64VPMOVQD128_256, + ssa.OpAMD64VPMOVQD256: p = simdV11(s, v) case ssa.OpAMD64VAESDECLAST128, @@ -246,12 +238,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPAVGW128, ssa.OpAMD64VPAVGW256, ssa.OpAMD64VPAVGW512, - ssa.OpAMD64VPACKSSDW128, - ssa.OpAMD64VPACKSSDW256, - ssa.OpAMD64VPACKSSDW512, - ssa.OpAMD64VPACKUSDW128, - ssa.OpAMD64VPACKUSDW256, - ssa.OpAMD64VPACKUSDW512, ssa.OpAMD64VPSIGNB128, ssa.OpAMD64VPSIGNB256, ssa.OpAMD64VPSIGNW128, @@ -425,6 +411,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPRORVQ128, ssa.OpAMD64VPRORVQ256, ssa.OpAMD64VPRORVQ512, + ssa.OpAMD64VPACKSSDW128, + ssa.OpAMD64VPACKSSDW256, + ssa.OpAMD64VPACKSSDW512, + ssa.OpAMD64VPACKUSDW128, + ssa.OpAMD64VPACKUSDW256, + ssa.OpAMD64VPACKUSDW512, ssa.OpAMD64VSCALEFPS128, ssa.OpAMD64VSCALEFPS256, ssa.OpAMD64VSCALEFPS512, @@ -565,12 +557,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPAVGWMasked128, ssa.OpAMD64VPAVGWMasked256, ssa.OpAMD64VPAVGWMasked512, - ssa.OpAMD64VPACKSSDWMasked128, - ssa.OpAMD64VPACKSSDWMasked256, - ssa.OpAMD64VPACKSSDWMasked512, - ssa.OpAMD64VPACKUSDWMasked128, - ssa.OpAMD64VPACKUSDWMasked256, - ssa.OpAMD64VPACKUSDWMasked512, ssa.OpAMD64VDIVPSMasked128, ssa.OpAMD64VDIVPSMasked256, ssa.OpAMD64VDIVPSMasked512, @@ -702,6 +688,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPRORVQMasked128, ssa.OpAMD64VPRORVQMasked256, ssa.OpAMD64VPRORVQMasked512, + ssa.OpAMD64VPACKSSDWMasked128, + ssa.OpAMD64VPACKSSDWMasked256, + ssa.OpAMD64VPACKSSDWMasked512, + ssa.OpAMD64VPACKUSDWMasked128, + ssa.OpAMD64VPACKUSDWMasked256, + ssa.OpAMD64VPACKUSDWMasked512, ssa.OpAMD64VSCALEFPSMasked128, ssa.OpAMD64VSCALEFPSMasked256, ssa.OpAMD64VSCALEFPSMasked512, @@ -824,102 +816,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPCOMPRESSQMasked128, ssa.OpAMD64VPCOMPRESSQMasked256, ssa.OpAMD64VPCOMPRESSQMasked512, - ssa.OpAMD64VPMOVWBMasked128_128, - ssa.OpAMD64VPMOVWBMasked128_256, - ssa.OpAMD64VPMOVWBMasked256, - ssa.OpAMD64VPMOVDBMasked128_128, - ssa.OpAMD64VPMOVDBMasked128_256, - ssa.OpAMD64VPMOVDBMasked128_512, - ssa.OpAMD64VPMOVQBMasked128_128, - ssa.OpAMD64VPMOVQBMasked128_256, - ssa.OpAMD64VPMOVQBMasked128_512, - ssa.OpAMD64VPMOVSWBMasked128_128, - ssa.OpAMD64VPMOVSWBMasked128_256, - ssa.OpAMD64VPMOVSWBMasked256, - ssa.OpAMD64VPMOVSDBMasked128_128, - ssa.OpAMD64VPMOVSDBMasked128_256, - ssa.OpAMD64VPMOVSDBMasked128_512, - ssa.OpAMD64VPMOVSQBMasked128_128, - ssa.OpAMD64VPMOVSQBMasked128_256, - ssa.OpAMD64VPMOVSQBMasked128_512, - ssa.OpAMD64VPMOVSXBWMasked256, - ssa.OpAMD64VPMOVSXBWMasked512, - ssa.OpAMD64VPMOVDWMasked128_128, - ssa.OpAMD64VPMOVDWMasked128_256, - ssa.OpAMD64VPMOVDWMasked256, - ssa.OpAMD64VPMOVQWMasked128_128, - ssa.OpAMD64VPMOVQWMasked128_256, - ssa.OpAMD64VPMOVQWMasked128_512, - ssa.OpAMD64VPMOVSDWMasked128_128, - ssa.OpAMD64VPMOVSDWMasked128_256, - ssa.OpAMD64VPMOVSDWMasked256, - ssa.OpAMD64VPMOVSQWMasked128_128, - ssa.OpAMD64VPMOVSQWMasked128_256, - ssa.OpAMD64VPMOVSQWMasked128_512, - ssa.OpAMD64VPMOVSXBWMasked128, ssa.OpAMD64VCVTTPS2DQMasked128, ssa.OpAMD64VCVTTPS2DQMasked256, ssa.OpAMD64VCVTTPS2DQMasked512, - ssa.OpAMD64VPMOVSXBDMasked512, - ssa.OpAMD64VPMOVSXWDMasked256, - ssa.OpAMD64VPMOVSXWDMasked512, - ssa.OpAMD64VPMOVQDMasked128_128, - ssa.OpAMD64VPMOVQDMasked128_256, - ssa.OpAMD64VPMOVQDMasked256, - ssa.OpAMD64VPMOVSQDMasked128_128, - ssa.OpAMD64VPMOVSQDMasked128_256, - ssa.OpAMD64VPMOVSQDMasked256, - ssa.OpAMD64VPMOVSXBDMasked128, - ssa.OpAMD64VPMOVSXWDMasked128, - ssa.OpAMD64VPMOVSXBDMasked256, - ssa.OpAMD64VPMOVSXWQMasked512, - ssa.OpAMD64VPMOVSXDQMasked256, - ssa.OpAMD64VPMOVSXDQMasked512, - ssa.OpAMD64VPMOVSXBQMasked128, - ssa.OpAMD64VPMOVSXWQMasked128, - ssa.OpAMD64VPMOVSXDQMasked128, - ssa.OpAMD64VPMOVSXBQMasked256, - ssa.OpAMD64VPMOVSXBQMasked512, - ssa.OpAMD64VPMOVUSWBMasked128_128, - ssa.OpAMD64VPMOVUSWBMasked128_256, - ssa.OpAMD64VPMOVUSWBMasked256, - ssa.OpAMD64VPMOVUSDBMasked128_128, - ssa.OpAMD64VPMOVUSDBMasked128_256, - ssa.OpAMD64VPMOVUSDBMasked128_512, - ssa.OpAMD64VPMOVUSQBMasked128_128, - ssa.OpAMD64VPMOVUSQBMasked128_256, - ssa.OpAMD64VPMOVUSQBMasked128_512, - ssa.OpAMD64VPMOVZXBWMasked256, - ssa.OpAMD64VPMOVZXBWMasked512, - ssa.OpAMD64VPMOVUSDWMasked128_128, - ssa.OpAMD64VPMOVUSDWMasked128_256, - ssa.OpAMD64VPMOVUSDWMasked256, - ssa.OpAMD64VPMOVUSQWMasked128_128, - ssa.OpAMD64VPMOVUSQWMasked128_256, - ssa.OpAMD64VPMOVUSQWMasked128_512, - ssa.OpAMD64VPMOVZXBWMasked128, ssa.OpAMD64VCVTPS2UDQMasked128, ssa.OpAMD64VCVTPS2UDQMasked256, ssa.OpAMD64VCVTPS2UDQMasked512, - ssa.OpAMD64VPMOVZXBDMasked512, - ssa.OpAMD64VPMOVZXWDMasked256, - ssa.OpAMD64VPMOVZXWDMasked512, - ssa.OpAMD64VPMOVUSQDMasked128_128, - ssa.OpAMD64VPMOVUSQDMasked128_256, - ssa.OpAMD64VPMOVUSQDMasked256, - ssa.OpAMD64VPMOVZXBDMasked128, - ssa.OpAMD64VPMOVZXWDMasked128, - ssa.OpAMD64VPMOVZXBDMasked256, - ssa.OpAMD64VPMOVZXWQMasked512, - ssa.OpAMD64VPMOVZXDQMasked256, - ssa.OpAMD64VPMOVZXDQMasked512, - ssa.OpAMD64VPMOVZXBQMasked128, - ssa.OpAMD64VPMOVZXWQMasked128, - ssa.OpAMD64VPMOVZXDQMasked128, - ssa.OpAMD64VPMOVSXWQMasked256, - ssa.OpAMD64VPMOVZXBQMasked256, - ssa.OpAMD64VPMOVZXWQMasked256, - ssa.OpAMD64VPMOVZXBQMasked512, ssa.OpAMD64VEXPANDPSMasked128, ssa.OpAMD64VEXPANDPSMasked256, ssa.OpAMD64VEXPANDPSMasked512, @@ -938,6 +840,42 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPEXPANDQMasked128, ssa.OpAMD64VPEXPANDQMasked256, ssa.OpAMD64VPEXPANDQMasked512, + ssa.OpAMD64VPMOVSXBQMasked128, + ssa.OpAMD64VPMOVSXWQMasked128, + ssa.OpAMD64VPMOVSXDQMasked128, + ssa.OpAMD64VPMOVZXBQMasked128, + ssa.OpAMD64VPMOVZXWQMasked128, + ssa.OpAMD64VPMOVZXDQMasked128, + ssa.OpAMD64VPMOVSXBDMasked128, + ssa.OpAMD64VPMOVSXWDMasked128, + ssa.OpAMD64VPMOVSXBQMasked256, + ssa.OpAMD64VPMOVSXWQMasked256, + ssa.OpAMD64VPMOVZXBDMasked128, + ssa.OpAMD64VPMOVZXWDMasked128, + ssa.OpAMD64VPMOVZXBQMasked256, + ssa.OpAMD64VPMOVZXWQMasked256, + ssa.OpAMD64VPMOVSXBWMasked128, + ssa.OpAMD64VPMOVSXBDMasked256, + ssa.OpAMD64VPMOVSXBQMasked512, + ssa.OpAMD64VPMOVZXBWMasked128, + ssa.OpAMD64VPMOVZXBDMasked256, + ssa.OpAMD64VPMOVZXBQMasked512, + ssa.OpAMD64VPMOVSXBWMasked256, + ssa.OpAMD64VPMOVSXBWMasked512, + ssa.OpAMD64VPMOVSXBDMasked512, + ssa.OpAMD64VPMOVSXWDMasked256, + ssa.OpAMD64VPMOVSXWDMasked512, + ssa.OpAMD64VPMOVSXWQMasked512, + ssa.OpAMD64VPMOVSXDQMasked256, + ssa.OpAMD64VPMOVSXDQMasked512, + ssa.OpAMD64VPMOVZXBWMasked256, + ssa.OpAMD64VPMOVZXBWMasked512, + ssa.OpAMD64VPMOVZXBDMasked512, + ssa.OpAMD64VPMOVZXWDMasked256, + ssa.OpAMD64VPMOVZXWDMasked512, + ssa.OpAMD64VPMOVZXWQMasked512, + ssa.OpAMD64VPMOVZXDQMasked256, + ssa.OpAMD64VPMOVZXDQMasked512, ssa.OpAMD64VPLZCNTDMasked128, ssa.OpAMD64VPLZCNTDMasked256, ssa.OpAMD64VPLZCNTDMasked512, @@ -968,12 +906,58 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VRSQRT14PDMasked128, ssa.OpAMD64VRSQRT14PDMasked256, ssa.OpAMD64VRSQRT14PDMasked512, + ssa.OpAMD64VPMOVSWBMasked128_128, + ssa.OpAMD64VPMOVSWBMasked128_256, + ssa.OpAMD64VPMOVSWBMasked256, + ssa.OpAMD64VPMOVSDBMasked128_128, + ssa.OpAMD64VPMOVSDBMasked128_256, + ssa.OpAMD64VPMOVSDBMasked128_512, + ssa.OpAMD64VPMOVSQBMasked128_128, + ssa.OpAMD64VPMOVSQBMasked128_256, + ssa.OpAMD64VPMOVSQBMasked128_512, + ssa.OpAMD64VPMOVSDWMasked128_128, + ssa.OpAMD64VPMOVSDWMasked128_256, + ssa.OpAMD64VPMOVSDWMasked256, + ssa.OpAMD64VPMOVSQWMasked128_128, + ssa.OpAMD64VPMOVSQWMasked128_256, + ssa.OpAMD64VPMOVSQWMasked128_512, + ssa.OpAMD64VPMOVSQDMasked128_128, + ssa.OpAMD64VPMOVSQDMasked128_256, + ssa.OpAMD64VPMOVSQDMasked256, + ssa.OpAMD64VPMOVUSWBMasked256, + ssa.OpAMD64VPMOVUSDWMasked128_128, + ssa.OpAMD64VPMOVUSDWMasked128_256, + ssa.OpAMD64VPMOVUSDWMasked256, + ssa.OpAMD64VPMOVUSQWMasked128_128, + ssa.OpAMD64VPMOVUSQWMasked128_256, + ssa.OpAMD64VPMOVUSQWMasked128_512, + ssa.OpAMD64VPMOVUSQDMasked128_128, + ssa.OpAMD64VPMOVUSQDMasked128_256, + ssa.OpAMD64VPMOVUSQDMasked256, ssa.OpAMD64VSQRTPSMasked128, ssa.OpAMD64VSQRTPSMasked256, ssa.OpAMD64VSQRTPSMasked512, ssa.OpAMD64VSQRTPDMasked128, ssa.OpAMD64VSQRTPDMasked256, ssa.OpAMD64VSQRTPDMasked512, + ssa.OpAMD64VPMOVWBMasked128_128, + ssa.OpAMD64VPMOVWBMasked128_256, + ssa.OpAMD64VPMOVWBMasked256, + ssa.OpAMD64VPMOVDBMasked128_128, + ssa.OpAMD64VPMOVDBMasked128_256, + ssa.OpAMD64VPMOVDBMasked128_512, + ssa.OpAMD64VPMOVQBMasked128_128, + ssa.OpAMD64VPMOVQBMasked128_256, + ssa.OpAMD64VPMOVQBMasked128_512, + ssa.OpAMD64VPMOVDWMasked128_128, + ssa.OpAMD64VPMOVDWMasked128_256, + ssa.OpAMD64VPMOVDWMasked256, + ssa.OpAMD64VPMOVQWMasked128_128, + ssa.OpAMD64VPMOVQWMasked128_256, + ssa.OpAMD64VPMOVQWMasked128_512, + ssa.OpAMD64VPMOVQDMasked128_128, + ssa.OpAMD64VPMOVQDMasked128_256, + ssa.OpAMD64VPMOVQDMasked256, ssa.OpAMD64VMOVDQU8Masked128, ssa.OpAMD64VMOVDQU8Masked256, ssa.OpAMD64VMOVDQU8Masked512, @@ -1345,12 +1329,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPALIGNRMasked256Merging, ssa.OpAMD64VPALIGNRMasked512Merging, ssa.OpAMD64VPALIGNRMasked128Merging, - ssa.OpAMD64VPACKSSDWMasked128Merging, - ssa.OpAMD64VPACKSSDWMasked256Merging, - ssa.OpAMD64VPACKSSDWMasked512Merging, - ssa.OpAMD64VPACKUSDWMasked128Merging, - ssa.OpAMD64VPACKUSDWMasked256Merging, - ssa.OpAMD64VPACKUSDWMasked512Merging, ssa.OpAMD64VDIVPSMasked128Merging, ssa.OpAMD64VDIVPSMasked256Merging, ssa.OpAMD64VDIVPSMasked512Merging, @@ -1492,6 +1470,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPRORVQMasked128Merging, ssa.OpAMD64VPRORVQMasked256Merging, ssa.OpAMD64VPRORVQMasked512Merging, + ssa.OpAMD64VPACKSSDWMasked128Merging, + ssa.OpAMD64VPACKSSDWMasked256Merging, + ssa.OpAMD64VPACKSSDWMasked512Merging, + ssa.OpAMD64VPACKUSDWMasked128Merging, + ssa.OpAMD64VPACKUSDWMasked256Merging, + ssa.OpAMD64VPACKUSDWMasked512Merging, ssa.OpAMD64VSCALEFPSMasked128Merging, ssa.OpAMD64VSCALEFPSMasked256Merging, ssa.OpAMD64VSCALEFPSMasked512Merging, @@ -1750,8 +1734,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPANDQ512load, ssa.OpAMD64VPANDND512load, ssa.OpAMD64VPANDNQ512load, - ssa.OpAMD64VPACKSSDW512load, - ssa.OpAMD64VPACKUSDW512load, ssa.OpAMD64VDIVPS512load, ssa.OpAMD64VDIVPD512load, ssa.OpAMD64VPUNPCKHDQ512load, @@ -1804,6 +1786,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPRORVQ128load, ssa.OpAMD64VPRORVQ256load, ssa.OpAMD64VPRORVQ512load, + ssa.OpAMD64VPACKSSDW512load, + ssa.OpAMD64VPACKUSDW512load, ssa.OpAMD64VSCALEFPS128load, ssa.OpAMD64VSCALEFPS256load, ssa.OpAMD64VSCALEFPS512load, @@ -1950,12 +1934,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPANDNQMasked128load, ssa.OpAMD64VPANDNQMasked256load, ssa.OpAMD64VPANDNQMasked512load, - ssa.OpAMD64VPACKSSDWMasked128load, - ssa.OpAMD64VPACKSSDWMasked256load, - ssa.OpAMD64VPACKSSDWMasked512load, - ssa.OpAMD64VPACKUSDWMasked128load, - ssa.OpAMD64VPACKUSDWMasked256load, - ssa.OpAMD64VPACKUSDWMasked512load, ssa.OpAMD64VDIVPSMasked128load, ssa.OpAMD64VDIVPSMasked256load, ssa.OpAMD64VDIVPSMasked512load, @@ -2036,6 +2014,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPRORVQMasked128load, ssa.OpAMD64VPRORVQMasked256load, ssa.OpAMD64VPRORVQMasked512load, + ssa.OpAMD64VPACKSSDWMasked128load, + ssa.OpAMD64VPACKSSDWMasked256load, + ssa.OpAMD64VPACKSSDWMasked512load, + ssa.OpAMD64VPACKUSDWMasked128load, + ssa.OpAMD64VPACKUSDWMasked256load, + ssa.OpAMD64VPACKUSDWMasked512load, ssa.OpAMD64VSCALEFPSMasked128load, ssa.OpAMD64VSCALEFPSMasked256load, ssa.OpAMD64VSCALEFPSMasked512load, @@ -2342,102 +2326,48 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VREDUCEPDMasked128Merging, ssa.OpAMD64VREDUCEPDMasked256Merging, ssa.OpAMD64VREDUCEPDMasked512Merging, - ssa.OpAMD64VPMOVWBMasked128_128Merging, - ssa.OpAMD64VPMOVWBMasked128_256Merging, - ssa.OpAMD64VPMOVWBMasked256Merging, - ssa.OpAMD64VPMOVDBMasked128_128Merging, - ssa.OpAMD64VPMOVDBMasked128_256Merging, - ssa.OpAMD64VPMOVDBMasked128_512Merging, - ssa.OpAMD64VPMOVQBMasked128_128Merging, - ssa.OpAMD64VPMOVQBMasked128_256Merging, - ssa.OpAMD64VPMOVQBMasked128_512Merging, - ssa.OpAMD64VPMOVSWBMasked128_128Merging, - ssa.OpAMD64VPMOVSWBMasked128_256Merging, - ssa.OpAMD64VPMOVSWBMasked256Merging, - ssa.OpAMD64VPMOVSDBMasked128_128Merging, - ssa.OpAMD64VPMOVSDBMasked128_256Merging, - ssa.OpAMD64VPMOVSDBMasked128_512Merging, - ssa.OpAMD64VPMOVSQBMasked128_128Merging, - ssa.OpAMD64VPMOVSQBMasked128_256Merging, - ssa.OpAMD64VPMOVSQBMasked128_512Merging, - ssa.OpAMD64VPMOVSXBWMasked256Merging, - ssa.OpAMD64VPMOVSXBWMasked512Merging, - ssa.OpAMD64VPMOVDWMasked128_128Merging, - ssa.OpAMD64VPMOVDWMasked128_256Merging, - ssa.OpAMD64VPMOVDWMasked256Merging, - ssa.OpAMD64VPMOVQWMasked128_128Merging, - ssa.OpAMD64VPMOVQWMasked128_256Merging, - ssa.OpAMD64VPMOVQWMasked128_512Merging, - ssa.OpAMD64VPMOVSDWMasked128_128Merging, - ssa.OpAMD64VPMOVSDWMasked128_256Merging, - ssa.OpAMD64VPMOVSDWMasked256Merging, - ssa.OpAMD64VPMOVSQWMasked128_128Merging, - ssa.OpAMD64VPMOVSQWMasked128_256Merging, - ssa.OpAMD64VPMOVSQWMasked128_512Merging, - ssa.OpAMD64VPMOVSXBWMasked128Merging, ssa.OpAMD64VCVTTPS2DQMasked128Merging, ssa.OpAMD64VCVTTPS2DQMasked256Merging, ssa.OpAMD64VCVTTPS2DQMasked512Merging, - ssa.OpAMD64VPMOVSXBDMasked512Merging, - ssa.OpAMD64VPMOVSXWDMasked256Merging, - ssa.OpAMD64VPMOVSXWDMasked512Merging, - ssa.OpAMD64VPMOVQDMasked128_128Merging, - ssa.OpAMD64VPMOVQDMasked128_256Merging, - ssa.OpAMD64VPMOVQDMasked256Merging, - ssa.OpAMD64VPMOVSQDMasked128_128Merging, - ssa.OpAMD64VPMOVSQDMasked128_256Merging, - ssa.OpAMD64VPMOVSQDMasked256Merging, - ssa.OpAMD64VPMOVSXBDMasked128Merging, - ssa.OpAMD64VPMOVSXWDMasked128Merging, - ssa.OpAMD64VPMOVSXBDMasked256Merging, - ssa.OpAMD64VPMOVSXWQMasked512Merging, - ssa.OpAMD64VPMOVSXDQMasked256Merging, - ssa.OpAMD64VPMOVSXDQMasked512Merging, - ssa.OpAMD64VPMOVSXBQMasked128Merging, - ssa.OpAMD64VPMOVSXWQMasked128Merging, - ssa.OpAMD64VPMOVSXDQMasked128Merging, - ssa.OpAMD64VPMOVSXBQMasked256Merging, - ssa.OpAMD64VPMOVSXBQMasked512Merging, - ssa.OpAMD64VPMOVUSWBMasked128_128Merging, - ssa.OpAMD64VPMOVUSWBMasked128_256Merging, - ssa.OpAMD64VPMOVUSWBMasked256Merging, - ssa.OpAMD64VPMOVUSDBMasked128_128Merging, - ssa.OpAMD64VPMOVUSDBMasked128_256Merging, - ssa.OpAMD64VPMOVUSDBMasked128_512Merging, - ssa.OpAMD64VPMOVUSQBMasked128_128Merging, - ssa.OpAMD64VPMOVUSQBMasked128_256Merging, - ssa.OpAMD64VPMOVUSQBMasked128_512Merging, - ssa.OpAMD64VPMOVZXBWMasked256Merging, - ssa.OpAMD64VPMOVZXBWMasked512Merging, - ssa.OpAMD64VPMOVUSDWMasked128_128Merging, - ssa.OpAMD64VPMOVUSDWMasked128_256Merging, - ssa.OpAMD64VPMOVUSDWMasked256Merging, - ssa.OpAMD64VPMOVUSQWMasked128_128Merging, - ssa.OpAMD64VPMOVUSQWMasked128_256Merging, - ssa.OpAMD64VPMOVUSQWMasked128_512Merging, - ssa.OpAMD64VPMOVZXBWMasked128Merging, ssa.OpAMD64VCVTPS2UDQMasked128Merging, ssa.OpAMD64VCVTPS2UDQMasked256Merging, ssa.OpAMD64VCVTPS2UDQMasked512Merging, - ssa.OpAMD64VPMOVZXBDMasked512Merging, - ssa.OpAMD64VPMOVZXWDMasked256Merging, - ssa.OpAMD64VPMOVZXWDMasked512Merging, - ssa.OpAMD64VPMOVUSQDMasked128_128Merging, - ssa.OpAMD64VPMOVUSQDMasked128_256Merging, - ssa.OpAMD64VPMOVUSQDMasked256Merging, - ssa.OpAMD64VPMOVZXBDMasked128Merging, - ssa.OpAMD64VPMOVZXWDMasked128Merging, - ssa.OpAMD64VPMOVZXBDMasked256Merging, - ssa.OpAMD64VPMOVZXWQMasked512Merging, - ssa.OpAMD64VPMOVZXDQMasked256Merging, - ssa.OpAMD64VPMOVZXDQMasked512Merging, + ssa.OpAMD64VPMOVSXBQMasked128Merging, + ssa.OpAMD64VPMOVSXWQMasked128Merging, + ssa.OpAMD64VPMOVSXDQMasked128Merging, ssa.OpAMD64VPMOVZXBQMasked128Merging, ssa.OpAMD64VPMOVZXWQMasked128Merging, ssa.OpAMD64VPMOVZXDQMasked128Merging, + ssa.OpAMD64VPMOVSXBDMasked128Merging, + ssa.OpAMD64VPMOVSXWDMasked128Merging, + ssa.OpAMD64VPMOVSXBQMasked256Merging, ssa.OpAMD64VPMOVSXWQMasked256Merging, + ssa.OpAMD64VPMOVZXBDMasked128Merging, + ssa.OpAMD64VPMOVZXWDMasked128Merging, ssa.OpAMD64VPMOVZXBQMasked256Merging, ssa.OpAMD64VPMOVZXWQMasked256Merging, + ssa.OpAMD64VPMOVSXBWMasked128Merging, + ssa.OpAMD64VPMOVSXBDMasked256Merging, + ssa.OpAMD64VPMOVSXBQMasked512Merging, + ssa.OpAMD64VPMOVZXBWMasked128Merging, + ssa.OpAMD64VPMOVZXBDMasked256Merging, ssa.OpAMD64VPMOVZXBQMasked512Merging, + ssa.OpAMD64VPMOVSXBWMasked256Merging, + ssa.OpAMD64VPMOVSXBWMasked512Merging, + ssa.OpAMD64VPMOVSXBDMasked512Merging, + ssa.OpAMD64VPMOVSXWDMasked256Merging, + ssa.OpAMD64VPMOVSXWDMasked512Merging, + ssa.OpAMD64VPMOVSXWQMasked512Merging, + ssa.OpAMD64VPMOVSXDQMasked256Merging, + ssa.OpAMD64VPMOVSXDQMasked512Merging, + ssa.OpAMD64VPMOVZXBWMasked256Merging, + ssa.OpAMD64VPMOVZXBWMasked512Merging, + ssa.OpAMD64VPMOVZXBDMasked512Merging, + ssa.OpAMD64VPMOVZXWDMasked256Merging, + ssa.OpAMD64VPMOVZXWDMasked512Merging, + ssa.OpAMD64VPMOVZXWQMasked512Merging, + ssa.OpAMD64VPMOVZXDQMasked256Merging, + ssa.OpAMD64VPMOVZXDQMasked512Merging, ssa.OpAMD64VPLZCNTDMasked128Merging, ssa.OpAMD64VPLZCNTDMasked256Merging, ssa.OpAMD64VPLZCNTDMasked512Merging, @@ -2480,12 +2410,58 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPRORQMasked128Merging, ssa.OpAMD64VPRORQMasked256Merging, ssa.OpAMD64VPRORQMasked512Merging, + ssa.OpAMD64VPMOVSWBMasked128_128Merging, + ssa.OpAMD64VPMOVSWBMasked128_256Merging, + ssa.OpAMD64VPMOVSWBMasked256Merging, + ssa.OpAMD64VPMOVSDBMasked128_128Merging, + ssa.OpAMD64VPMOVSDBMasked128_256Merging, + ssa.OpAMD64VPMOVSDBMasked128_512Merging, + ssa.OpAMD64VPMOVSQBMasked128_128Merging, + ssa.OpAMD64VPMOVSQBMasked128_256Merging, + ssa.OpAMD64VPMOVSQBMasked128_512Merging, + ssa.OpAMD64VPMOVSDWMasked128_128Merging, + ssa.OpAMD64VPMOVSDWMasked128_256Merging, + ssa.OpAMD64VPMOVSDWMasked256Merging, + ssa.OpAMD64VPMOVSQWMasked128_128Merging, + ssa.OpAMD64VPMOVSQWMasked128_256Merging, + ssa.OpAMD64VPMOVSQWMasked128_512Merging, + ssa.OpAMD64VPMOVSQDMasked128_128Merging, + ssa.OpAMD64VPMOVSQDMasked128_256Merging, + ssa.OpAMD64VPMOVSQDMasked256Merging, + ssa.OpAMD64VPMOVUSWBMasked256Merging, + ssa.OpAMD64VPMOVUSDWMasked128_128Merging, + ssa.OpAMD64VPMOVUSDWMasked128_256Merging, + ssa.OpAMD64VPMOVUSDWMasked256Merging, + ssa.OpAMD64VPMOVUSQWMasked128_128Merging, + ssa.OpAMD64VPMOVUSQWMasked128_256Merging, + ssa.OpAMD64VPMOVUSQWMasked128_512Merging, + ssa.OpAMD64VPMOVUSQDMasked128_128Merging, + ssa.OpAMD64VPMOVUSQDMasked128_256Merging, + ssa.OpAMD64VPMOVUSQDMasked256Merging, ssa.OpAMD64VSQRTPSMasked128Merging, ssa.OpAMD64VSQRTPSMasked256Merging, ssa.OpAMD64VSQRTPSMasked512Merging, ssa.OpAMD64VSQRTPDMasked128Merging, ssa.OpAMD64VSQRTPDMasked256Merging, ssa.OpAMD64VSQRTPDMasked512Merging, + ssa.OpAMD64VPMOVWBMasked128_128Merging, + ssa.OpAMD64VPMOVWBMasked128_256Merging, + ssa.OpAMD64VPMOVWBMasked256Merging, + ssa.OpAMD64VPMOVDBMasked128_128Merging, + ssa.OpAMD64VPMOVDBMasked128_256Merging, + ssa.OpAMD64VPMOVDBMasked128_512Merging, + ssa.OpAMD64VPMOVQBMasked128_128Merging, + ssa.OpAMD64VPMOVQBMasked128_256Merging, + ssa.OpAMD64VPMOVQBMasked128_512Merging, + ssa.OpAMD64VPMOVDWMasked128_128Merging, + ssa.OpAMD64VPMOVDWMasked128_256Merging, + ssa.OpAMD64VPMOVDWMasked256Merging, + ssa.OpAMD64VPMOVQWMasked128_128Merging, + ssa.OpAMD64VPMOVQWMasked128_256Merging, + ssa.OpAMD64VPMOVQWMasked128_512Merging, + ssa.OpAMD64VPMOVQDMasked128_128Merging, + ssa.OpAMD64VPMOVQDMasked128_256Merging, + ssa.OpAMD64VPMOVQDMasked256Merging, ssa.OpAMD64VPSHUFDMasked256Merging, ssa.OpAMD64VPSHUFDMasked512Merging, ssa.OpAMD64VPSHUFHWMasked256Merging, @@ -2719,120 +2695,18 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPALIGNRMasked256, ssa.OpAMD64VPALIGNRMasked512, ssa.OpAMD64VPALIGNRMasked128, - ssa.OpAMD64VPMOVWBMasked128_128, - ssa.OpAMD64VPMOVWBMasked128_256, - ssa.OpAMD64VPMOVWBMasked256, - ssa.OpAMD64VPMOVDBMasked128_128, - ssa.OpAMD64VPMOVDBMasked128_256, - ssa.OpAMD64VPMOVDBMasked128_512, - ssa.OpAMD64VPMOVQBMasked128_128, - ssa.OpAMD64VPMOVQBMasked128_256, - ssa.OpAMD64VPMOVQBMasked128_512, - ssa.OpAMD64VPMOVSWBMasked128_128, - ssa.OpAMD64VPMOVSWBMasked128_256, - ssa.OpAMD64VPMOVSWBMasked256, - ssa.OpAMD64VPMOVSDBMasked128_128, - ssa.OpAMD64VPMOVSDBMasked128_256, - ssa.OpAMD64VPMOVSDBMasked128_512, - ssa.OpAMD64VPMOVSQBMasked128_128, - ssa.OpAMD64VPMOVSQBMasked128_256, - ssa.OpAMD64VPMOVSQBMasked128_512, - ssa.OpAMD64VPMOVSXBWMasked256, - ssa.OpAMD64VPMOVSXBWMasked512, - ssa.OpAMD64VPMOVDWMasked128_128, - ssa.OpAMD64VPMOVDWMasked128_256, - ssa.OpAMD64VPMOVDWMasked256, - ssa.OpAMD64VPMOVQWMasked128_128, - ssa.OpAMD64VPMOVQWMasked128_256, - ssa.OpAMD64VPMOVQWMasked128_512, - ssa.OpAMD64VPMOVSDWMasked128_128, - ssa.OpAMD64VPMOVSDWMasked128_256, - ssa.OpAMD64VPMOVSDWMasked256, - ssa.OpAMD64VPMOVSQWMasked128_128, - ssa.OpAMD64VPMOVSQWMasked128_256, - ssa.OpAMD64VPMOVSQWMasked128_512, - ssa.OpAMD64VPACKSSDWMasked128, - ssa.OpAMD64VPACKSSDWMasked128load, - ssa.OpAMD64VPACKSSDWMasked256, - ssa.OpAMD64VPACKSSDWMasked256load, - ssa.OpAMD64VPACKSSDWMasked512, - ssa.OpAMD64VPACKSSDWMasked512load, - ssa.OpAMD64VPMOVSXBWMasked128, ssa.OpAMD64VCVTTPS2DQMasked128, ssa.OpAMD64VCVTTPS2DQMasked128load, ssa.OpAMD64VCVTTPS2DQMasked256, ssa.OpAMD64VCVTTPS2DQMasked256load, ssa.OpAMD64VCVTTPS2DQMasked512, ssa.OpAMD64VCVTTPS2DQMasked512load, - ssa.OpAMD64VPMOVSXBDMasked512, - ssa.OpAMD64VPMOVSXWDMasked256, - ssa.OpAMD64VPMOVSXWDMasked512, - ssa.OpAMD64VPMOVQDMasked128_128, - ssa.OpAMD64VPMOVQDMasked128_256, - ssa.OpAMD64VPMOVQDMasked256, - ssa.OpAMD64VPMOVSQDMasked128_128, - ssa.OpAMD64VPMOVSQDMasked128_256, - ssa.OpAMD64VPMOVSQDMasked256, - ssa.OpAMD64VPMOVSXBDMasked128, - ssa.OpAMD64VPMOVSXWDMasked128, - ssa.OpAMD64VPMOVSXBDMasked256, - ssa.OpAMD64VPMOVSXWQMasked512, - ssa.OpAMD64VPMOVSXDQMasked256, - ssa.OpAMD64VPMOVSXDQMasked512, - ssa.OpAMD64VPMOVSXBQMasked128, - ssa.OpAMD64VPMOVSXWQMasked128, - ssa.OpAMD64VPMOVSXDQMasked128, - ssa.OpAMD64VPMOVSXBQMasked256, - ssa.OpAMD64VPMOVSXBQMasked512, - ssa.OpAMD64VPMOVUSWBMasked128_128, - ssa.OpAMD64VPMOVUSWBMasked128_256, - ssa.OpAMD64VPMOVUSWBMasked256, - ssa.OpAMD64VPMOVUSDBMasked128_128, - ssa.OpAMD64VPMOVUSDBMasked128_256, - ssa.OpAMD64VPMOVUSDBMasked128_512, - ssa.OpAMD64VPMOVUSQBMasked128_128, - ssa.OpAMD64VPMOVUSQBMasked128_256, - ssa.OpAMD64VPMOVUSQBMasked128_512, - ssa.OpAMD64VPMOVZXBWMasked256, - ssa.OpAMD64VPMOVZXBWMasked512, - ssa.OpAMD64VPMOVUSDWMasked128_128, - ssa.OpAMD64VPMOVUSDWMasked128_256, - ssa.OpAMD64VPMOVUSDWMasked256, - ssa.OpAMD64VPMOVUSQWMasked128_128, - ssa.OpAMD64VPMOVUSQWMasked128_256, - ssa.OpAMD64VPMOVUSQWMasked128_512, - ssa.OpAMD64VPACKUSDWMasked128, - ssa.OpAMD64VPACKUSDWMasked128load, - ssa.OpAMD64VPACKUSDWMasked256, - ssa.OpAMD64VPACKUSDWMasked256load, - ssa.OpAMD64VPACKUSDWMasked512, - ssa.OpAMD64VPACKUSDWMasked512load, - ssa.OpAMD64VPMOVZXBWMasked128, ssa.OpAMD64VCVTPS2UDQMasked128, ssa.OpAMD64VCVTPS2UDQMasked128load, ssa.OpAMD64VCVTPS2UDQMasked256, ssa.OpAMD64VCVTPS2UDQMasked256load, ssa.OpAMD64VCVTPS2UDQMasked512, ssa.OpAMD64VCVTPS2UDQMasked512load, - ssa.OpAMD64VPMOVZXBDMasked512, - ssa.OpAMD64VPMOVZXWDMasked256, - ssa.OpAMD64VPMOVZXWDMasked512, - ssa.OpAMD64VPMOVUSQDMasked128_128, - ssa.OpAMD64VPMOVUSQDMasked128_256, - ssa.OpAMD64VPMOVUSQDMasked256, - ssa.OpAMD64VPMOVZXBDMasked128, - ssa.OpAMD64VPMOVZXWDMasked128, - ssa.OpAMD64VPMOVZXBDMasked256, - ssa.OpAMD64VPMOVZXWQMasked512, - ssa.OpAMD64VPMOVZXDQMasked256, - ssa.OpAMD64VPMOVZXDQMasked512, - ssa.OpAMD64VPMOVZXBQMasked128, - ssa.OpAMD64VPMOVZXWQMasked128, - ssa.OpAMD64VPMOVZXDQMasked128, - ssa.OpAMD64VPMOVSXWQMasked256, - ssa.OpAMD64VPMOVZXBQMasked256, - ssa.OpAMD64VPMOVZXWQMasked256, - ssa.OpAMD64VPMOVZXBQMasked512, ssa.OpAMD64VDIVPSMasked128, ssa.OpAMD64VDIVPSMasked128load, ssa.OpAMD64VDIVPSMasked256, @@ -2881,6 +2755,42 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPEXPANDQMasked128, ssa.OpAMD64VPEXPANDQMasked256, ssa.OpAMD64VPEXPANDQMasked512, + ssa.OpAMD64VPMOVSXBQMasked128, + ssa.OpAMD64VPMOVSXWQMasked128, + ssa.OpAMD64VPMOVSXDQMasked128, + ssa.OpAMD64VPMOVZXBQMasked128, + ssa.OpAMD64VPMOVZXWQMasked128, + ssa.OpAMD64VPMOVZXDQMasked128, + ssa.OpAMD64VPMOVSXBDMasked128, + ssa.OpAMD64VPMOVSXWDMasked128, + ssa.OpAMD64VPMOVSXBQMasked256, + ssa.OpAMD64VPMOVSXWQMasked256, + ssa.OpAMD64VPMOVZXBDMasked128, + ssa.OpAMD64VPMOVZXWDMasked128, + ssa.OpAMD64VPMOVZXBQMasked256, + ssa.OpAMD64VPMOVZXWQMasked256, + ssa.OpAMD64VPMOVSXBWMasked128, + ssa.OpAMD64VPMOVSXBDMasked256, + ssa.OpAMD64VPMOVSXBQMasked512, + ssa.OpAMD64VPMOVZXBWMasked128, + ssa.OpAMD64VPMOVZXBDMasked256, + ssa.OpAMD64VPMOVZXBQMasked512, + ssa.OpAMD64VPMOVSXBWMasked256, + ssa.OpAMD64VPMOVSXBWMasked512, + ssa.OpAMD64VPMOVSXBDMasked512, + ssa.OpAMD64VPMOVSXWDMasked256, + ssa.OpAMD64VPMOVSXWDMasked512, + ssa.OpAMD64VPMOVSXWQMasked512, + ssa.OpAMD64VPMOVSXDQMasked256, + ssa.OpAMD64VPMOVSXDQMasked512, + ssa.OpAMD64VPMOVZXBWMasked256, + ssa.OpAMD64VPMOVZXBWMasked512, + ssa.OpAMD64VPMOVZXBDMasked512, + ssa.OpAMD64VPMOVZXWDMasked256, + ssa.OpAMD64VPMOVZXWDMasked512, + ssa.OpAMD64VPMOVZXWQMasked512, + ssa.OpAMD64VPMOVZXDQMasked256, + ssa.OpAMD64VPMOVZXDQMasked512, ssa.OpAMD64VGF2P8AFFINEINVQBMasked128, ssa.OpAMD64VGF2P8AFFINEINVQBMasked128load, ssa.OpAMD64VGF2P8AFFINEINVQBMasked256, @@ -3200,6 +3110,46 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPRORVQMasked256load, ssa.OpAMD64VPRORVQMasked512, ssa.OpAMD64VPRORVQMasked512load, + ssa.OpAMD64VPMOVSWBMasked128_128, + ssa.OpAMD64VPMOVSWBMasked128_256, + ssa.OpAMD64VPMOVSWBMasked256, + ssa.OpAMD64VPMOVSDBMasked128_128, + ssa.OpAMD64VPMOVSDBMasked128_256, + ssa.OpAMD64VPMOVSDBMasked128_512, + ssa.OpAMD64VPMOVSQBMasked128_128, + ssa.OpAMD64VPMOVSQBMasked128_256, + ssa.OpAMD64VPMOVSQBMasked128_512, + ssa.OpAMD64VPACKSSDWMasked128, + ssa.OpAMD64VPACKSSDWMasked128load, + ssa.OpAMD64VPACKSSDWMasked256, + ssa.OpAMD64VPACKSSDWMasked256load, + ssa.OpAMD64VPACKSSDWMasked512, + ssa.OpAMD64VPACKSSDWMasked512load, + ssa.OpAMD64VPMOVSDWMasked128_128, + ssa.OpAMD64VPMOVSDWMasked128_256, + ssa.OpAMD64VPMOVSDWMasked256, + ssa.OpAMD64VPMOVSQWMasked128_128, + ssa.OpAMD64VPMOVSQWMasked128_256, + ssa.OpAMD64VPMOVSQWMasked128_512, + ssa.OpAMD64VPMOVSQDMasked128_128, + ssa.OpAMD64VPMOVSQDMasked128_256, + ssa.OpAMD64VPMOVSQDMasked256, + ssa.OpAMD64VPMOVUSWBMasked256, + ssa.OpAMD64VPACKUSDWMasked128, + ssa.OpAMD64VPACKUSDWMasked128load, + ssa.OpAMD64VPACKUSDWMasked256, + ssa.OpAMD64VPACKUSDWMasked256load, + ssa.OpAMD64VPACKUSDWMasked512, + ssa.OpAMD64VPACKUSDWMasked512load, + ssa.OpAMD64VPMOVUSDWMasked128_128, + ssa.OpAMD64VPMOVUSDWMasked128_256, + ssa.OpAMD64VPMOVUSDWMasked256, + ssa.OpAMD64VPMOVUSQWMasked128_128, + ssa.OpAMD64VPMOVUSQWMasked128_256, + ssa.OpAMD64VPMOVUSQWMasked128_512, + ssa.OpAMD64VPMOVUSQDMasked128_128, + ssa.OpAMD64VPMOVUSQDMasked128_256, + ssa.OpAMD64VPMOVUSQDMasked256, ssa.OpAMD64VSCALEFPSMasked128, ssa.OpAMD64VSCALEFPSMasked128load, ssa.OpAMD64VSCALEFPSMasked256, @@ -3398,6 +3348,24 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPSUBUSWMasked128, ssa.OpAMD64VPSUBUSWMasked256, ssa.OpAMD64VPSUBUSWMasked512, + ssa.OpAMD64VPMOVWBMasked128_128, + ssa.OpAMD64VPMOVWBMasked128_256, + ssa.OpAMD64VPMOVWBMasked256, + ssa.OpAMD64VPMOVDBMasked128_128, + ssa.OpAMD64VPMOVDBMasked128_256, + ssa.OpAMD64VPMOVDBMasked128_512, + ssa.OpAMD64VPMOVQBMasked128_128, + ssa.OpAMD64VPMOVQBMasked128_256, + ssa.OpAMD64VPMOVQBMasked128_512, + ssa.OpAMD64VPMOVDWMasked128_128, + ssa.OpAMD64VPMOVDWMasked128_256, + ssa.OpAMD64VPMOVDWMasked256, + ssa.OpAMD64VPMOVQWMasked128_128, + ssa.OpAMD64VPMOVQWMasked128_256, + ssa.OpAMD64VPMOVQWMasked128_512, + ssa.OpAMD64VPMOVQDMasked128_128, + ssa.OpAMD64VPMOVQDMasked128_256, + ssa.OpAMD64VPMOVQDMasked256, ssa.OpAMD64VPXORDMasked128, ssa.OpAMD64VPXORDMasked128load, ssa.OpAMD64VPXORDMasked256, diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index 24d9f1a3d30..464db33d3b2 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -249,126 +249,12 @@ (ConcatShiftBytesRightUint8x16 ...) => (VPALIGNR128 ...) (ConcatShiftBytesRightGroupedUint8x32 ...) => (VPALIGNR256 ...) (ConcatShiftBytesRightGroupedUint8x64 ...) => (VPALIGNR512 ...) -(ConvertToInt8Int16x8 ...) => (VPMOVWB128_128 ...) -(ConvertToInt8Int16x16 ...) => (VPMOVWB128_256 ...) -(ConvertToInt8Int16x32 ...) => (VPMOVWB256 ...) -(ConvertToInt8Int32x4 ...) => (VPMOVDB128_128 ...) -(ConvertToInt8Int32x8 ...) => (VPMOVDB128_256 ...) -(ConvertToInt8Int32x16 ...) => (VPMOVDB128_512 ...) -(ConvertToInt8Int64x2 ...) => (VPMOVQB128_128 ...) -(ConvertToInt8Int64x4 ...) => (VPMOVQB128_256 ...) -(ConvertToInt8Int64x8 ...) => (VPMOVQB128_512 ...) -(ConvertToInt8SaturatedInt16x8 ...) => (VPMOVSWB128_128 ...) -(ConvertToInt8SaturatedInt16x16 ...) => (VPMOVSWB128_256 ...) -(ConvertToInt8SaturatedInt16x32 ...) => (VPMOVSWB256 ...) -(ConvertToInt8SaturatedInt32x4 ...) => (VPMOVSDB128_128 ...) -(ConvertToInt8SaturatedInt32x8 ...) => (VPMOVSDB128_256 ...) -(ConvertToInt8SaturatedInt32x16 ...) => (VPMOVSDB128_512 ...) -(ConvertToInt8SaturatedInt64x2 ...) => (VPMOVSQB128_128 ...) -(ConvertToInt8SaturatedInt64x4 ...) => (VPMOVSQB128_256 ...) -(ConvertToInt8SaturatedInt64x8 ...) => (VPMOVSQB128_512 ...) -(ConvertToInt16Int8x16 ...) => (VPMOVSXBW256 ...) -(ConvertToInt16Int8x32 ...) => (VPMOVSXBW512 ...) -(ConvertToInt16Int32x4 ...) => (VPMOVDW128_128 ...) -(ConvertToInt16Int32x8 ...) => (VPMOVDW128_256 ...) -(ConvertToInt16Int32x16 ...) => (VPMOVDW256 ...) -(ConvertToInt16Int64x2 ...) => (VPMOVQW128_128 ...) -(ConvertToInt16Int64x4 ...) => (VPMOVQW128_256 ...) -(ConvertToInt16Int64x8 ...) => (VPMOVQW128_512 ...) -(ConvertToInt16SaturatedInt32x4 ...) => (VPMOVSDW128_128 ...) -(ConvertToInt16SaturatedInt32x8 ...) => (VPMOVSDW128_256 ...) -(ConvertToInt16SaturatedInt32x16 ...) => (VPMOVSDW256 ...) -(ConvertToInt16SaturatedInt64x2 ...) => (VPMOVSQW128_128 ...) -(ConvertToInt16SaturatedInt64x4 ...) => (VPMOVSQW128_256 ...) -(ConvertToInt16SaturatedInt64x8 ...) => (VPMOVSQW128_512 ...) -(ConvertToInt16SaturatedPackedInt32x4 ...) => (VPACKSSDW128 ...) -(ConvertToInt16SaturatedPackedInt32x8 ...) => (VPACKSSDW256 ...) -(ConvertToInt16SaturatedPackedInt32x16 ...) => (VPACKSSDW512 ...) -(ConvertToInt16x8Int8x16 ...) => (VPMOVSXBW128 ...) (ConvertToInt32Float32x4 ...) => (VCVTTPS2DQ128 ...) (ConvertToInt32Float32x8 ...) => (VCVTTPS2DQ256 ...) (ConvertToInt32Float32x16 ...) => (VCVTTPS2DQ512 ...) -(ConvertToInt32Int8x16 ...) => (VPMOVSXBD512 ...) -(ConvertToInt32Int16x8 ...) => (VPMOVSXWD256 ...) -(ConvertToInt32Int16x16 ...) => (VPMOVSXWD512 ...) -(ConvertToInt32Int64x2 ...) => (VPMOVQD128_128 ...) -(ConvertToInt32Int64x4 ...) => (VPMOVQD128_256 ...) -(ConvertToInt32Int64x8 ...) => (VPMOVQD256 ...) -(ConvertToInt32SaturatedInt64x2 ...) => (VPMOVSQD128_128 ...) -(ConvertToInt32SaturatedInt64x4 ...) => (VPMOVSQD128_256 ...) -(ConvertToInt32SaturatedInt64x8 ...) => (VPMOVSQD256 ...) -(ConvertToInt32x4Int8x16 ...) => (VPMOVSXBD128 ...) -(ConvertToInt32x4Int16x8 ...) => (VPMOVSXWD128 ...) -(ConvertToInt32x8Int8x16 ...) => (VPMOVSXBD256 ...) -(ConvertToInt64Int16x8 ...) => (VPMOVSXWQ512 ...) -(ConvertToInt64Int32x4 ...) => (VPMOVSXDQ256 ...) -(ConvertToInt64Int32x8 ...) => (VPMOVSXDQ512 ...) -(ConvertToInt64x2Int8x16 ...) => (VPMOVSXBQ128 ...) -(ConvertToInt64x2Int16x8 ...) => (VPMOVSXWQ128 ...) -(ConvertToInt64x2Int32x4 ...) => (VPMOVSXDQ128 ...) -(ConvertToInt64x4Int8x16 ...) => (VPMOVSXBQ256 ...) -(ConvertToInt64x8Int8x16 ...) => (VPMOVSXBQ512 ...) -(ConvertToUint8Uint16x8 ...) => (VPMOVWB128_128 ...) -(ConvertToUint8Uint16x16 ...) => (VPMOVWB128_256 ...) -(ConvertToUint8Uint16x32 ...) => (VPMOVWB256 ...) -(ConvertToUint8Uint32x4 ...) => (VPMOVDB128_128 ...) -(ConvertToUint8Uint32x8 ...) => (VPMOVDB128_256 ...) -(ConvertToUint8Uint32x16 ...) => (VPMOVDB128_512 ...) -(ConvertToUint8Uint64x2 ...) => (VPMOVQB128_128 ...) -(ConvertToUint8Uint64x4 ...) => (VPMOVQB128_256 ...) -(ConvertToUint8Uint64x8 ...) => (VPMOVQB128_512 ...) -(ConvertToUint8SaturatedUint16x8 ...) => (VPMOVUSWB128_128 ...) -(ConvertToUint8SaturatedUint16x16 ...) => (VPMOVUSWB128_256 ...) -(ConvertToUint8SaturatedUint16x32 ...) => (VPMOVUSWB256 ...) -(ConvertToUint8SaturatedUint32x4 ...) => (VPMOVUSDB128_128 ...) -(ConvertToUint8SaturatedUint32x8 ...) => (VPMOVUSDB128_256 ...) -(ConvertToUint8SaturatedUint32x16 ...) => (VPMOVUSDB128_512 ...) -(ConvertToUint8SaturatedUint64x2 ...) => (VPMOVUSQB128_128 ...) -(ConvertToUint8SaturatedUint64x4 ...) => (VPMOVUSQB128_256 ...) -(ConvertToUint8SaturatedUint64x8 ...) => (VPMOVUSQB128_512 ...) -(ConvertToUint16Uint8x16 ...) => (VPMOVZXBW256 ...) -(ConvertToUint16Uint8x32 ...) => (VPMOVZXBW512 ...) -(ConvertToUint16Uint32x4 ...) => (VPMOVDW128_128 ...) -(ConvertToUint16Uint32x8 ...) => (VPMOVDW128_256 ...) -(ConvertToUint16Uint32x16 ...) => (VPMOVDW256 ...) -(ConvertToUint16Uint64x2 ...) => (VPMOVQW128_128 ...) -(ConvertToUint16Uint64x4 ...) => (VPMOVQW128_256 ...) -(ConvertToUint16Uint64x8 ...) => (VPMOVQW128_512 ...) -(ConvertToUint16SaturatedUint32x4 ...) => (VPMOVUSDW128_128 ...) -(ConvertToUint16SaturatedUint32x8 ...) => (VPMOVUSDW128_256 ...) -(ConvertToUint16SaturatedUint32x16 ...) => (VPMOVUSDW256 ...) -(ConvertToUint16SaturatedUint64x2 ...) => (VPMOVUSQW128_128 ...) -(ConvertToUint16SaturatedUint64x4 ...) => (VPMOVUSQW128_256 ...) -(ConvertToUint16SaturatedUint64x8 ...) => (VPMOVUSQW128_512 ...) -(ConvertToUint16SaturatedPackedUint32x4 ...) => (VPACKUSDW128 ...) -(ConvertToUint16SaturatedPackedUint32x8 ...) => (VPACKUSDW256 ...) -(ConvertToUint16SaturatedPackedUint32x16 ...) => (VPACKUSDW512 ...) -(ConvertToUint16x8Uint8x16 ...) => (VPMOVZXBW128 ...) (ConvertToUint32Float32x4 ...) => (VCVTPS2UDQ128 ...) (ConvertToUint32Float32x8 ...) => (VCVTPS2UDQ256 ...) (ConvertToUint32Float32x16 ...) => (VCVTPS2UDQ512 ...) -(ConvertToUint32Uint8x16 ...) => (VPMOVZXBD512 ...) -(ConvertToUint32Uint16x8 ...) => (VPMOVZXWD256 ...) -(ConvertToUint32Uint16x16 ...) => (VPMOVZXWD512 ...) -(ConvertToUint32Uint64x2 ...) => (VPMOVQD128_128 ...) -(ConvertToUint32Uint64x4 ...) => (VPMOVQD128_256 ...) -(ConvertToUint32Uint64x8 ...) => (VPMOVQD256 ...) -(ConvertToUint32SaturatedUint64x2 ...) => (VPMOVUSQD128_128 ...) -(ConvertToUint32SaturatedUint64x4 ...) => (VPMOVUSQD128_256 ...) -(ConvertToUint32SaturatedUint64x8 ...) => (VPMOVUSQD256 ...) -(ConvertToUint32x4Uint8x16 ...) => (VPMOVZXBD128 ...) -(ConvertToUint32x4Uint16x8 ...) => (VPMOVZXWD128 ...) -(ConvertToUint32x8Uint8x16 ...) => (VPMOVZXBD256 ...) -(ConvertToUint64Uint16x8 ...) => (VPMOVZXWQ512 ...) -(ConvertToUint64Uint32x4 ...) => (VPMOVZXDQ256 ...) -(ConvertToUint64Uint32x8 ...) => (VPMOVZXDQ512 ...) -(ConvertToUint64x2Uint8x16 ...) => (VPMOVZXBQ128 ...) -(ConvertToUint64x2Uint16x8 ...) => (VPMOVZXWQ128 ...) -(ConvertToUint64x2Uint32x4 ...) => (VPMOVZXDQ128 ...) -(ConvertToUint64x4Int16x8 ...) => (VPMOVSXWQ256 ...) -(ConvertToUint64x4Uint8x16 ...) => (VPMOVZXBQ256 ...) -(ConvertToUint64x4Uint16x8 ...) => (VPMOVZXWQ256 ...) -(ConvertToUint64x8Uint8x16 ...) => (VPMOVZXBQ512 ...) (CopySignInt8x16 ...) => (VPSIGNB128 ...) (CopySignInt8x32 ...) => (VPSIGNB256 ...) (CopySignInt16x8 ...) => (VPSIGNW128 ...) @@ -453,6 +339,42 @@ (ExpandUint64x2 x mask) => (VPEXPANDQMasked128 x (VPMOVVec64x2ToM mask)) (ExpandUint64x4 x mask) => (VPEXPANDQMasked256 x (VPMOVVec64x4ToM mask)) (ExpandUint64x8 x mask) => (VPEXPANDQMasked512 x (VPMOVVec64x8ToM mask)) +(ExtendLo2ToInt64x2Int8x16 ...) => (VPMOVSXBQ128 ...) +(ExtendLo2ToInt64x2Int16x8 ...) => (VPMOVSXWQ128 ...) +(ExtendLo2ToInt64x2Int32x4 ...) => (VPMOVSXDQ128 ...) +(ExtendLo2ToUint64x2Uint8x16 ...) => (VPMOVZXBQ128 ...) +(ExtendLo2ToUint64x2Uint16x8 ...) => (VPMOVZXWQ128 ...) +(ExtendLo2ToUint64x2Uint32x4 ...) => (VPMOVZXDQ128 ...) +(ExtendLo4ToInt32x4Int8x16 ...) => (VPMOVSXBD128 ...) +(ExtendLo4ToInt32x4Int16x8 ...) => (VPMOVSXWD128 ...) +(ExtendLo4ToInt64x4Int8x16 ...) => (VPMOVSXBQ256 ...) +(ExtendLo4ToInt64x4Int16x8 ...) => (VPMOVSXWQ256 ...) +(ExtendLo4ToUint32x4Uint8x16 ...) => (VPMOVZXBD128 ...) +(ExtendLo4ToUint32x4Uint16x8 ...) => (VPMOVZXWD128 ...) +(ExtendLo4ToUint64x4Uint8x16 ...) => (VPMOVZXBQ256 ...) +(ExtendLo4ToUint64x4Uint16x8 ...) => (VPMOVZXWQ256 ...) +(ExtendLo8ToInt16x8Int8x16 ...) => (VPMOVSXBW128 ...) +(ExtendLo8ToInt32x8Int8x16 ...) => (VPMOVSXBD256 ...) +(ExtendLo8ToInt64x8Int8x16 ...) => (VPMOVSXBQ512 ...) +(ExtendLo8ToUint16x8Uint8x16 ...) => (VPMOVZXBW128 ...) +(ExtendLo8ToUint32x8Uint8x16 ...) => (VPMOVZXBD256 ...) +(ExtendLo8ToUint64x8Uint8x16 ...) => (VPMOVZXBQ512 ...) +(ExtendToInt16Int8x16 ...) => (VPMOVSXBW256 ...) +(ExtendToInt16Int8x32 ...) => (VPMOVSXBW512 ...) +(ExtendToInt32Int8x16 ...) => (VPMOVSXBD512 ...) +(ExtendToInt32Int16x8 ...) => (VPMOVSXWD256 ...) +(ExtendToInt32Int16x16 ...) => (VPMOVSXWD512 ...) +(ExtendToInt64Int16x8 ...) => (VPMOVSXWQ512 ...) +(ExtendToInt64Int32x4 ...) => (VPMOVSXDQ256 ...) +(ExtendToInt64Int32x8 ...) => (VPMOVSXDQ512 ...) +(ExtendToUint16Uint8x16 ...) => (VPMOVZXBW256 ...) +(ExtendToUint16Uint8x32 ...) => (VPMOVZXBW512 ...) +(ExtendToUint32Uint8x16 ...) => (VPMOVZXBD512 ...) +(ExtendToUint32Uint16x8 ...) => (VPMOVZXWD256 ...) +(ExtendToUint32Uint16x16 ...) => (VPMOVZXWD512 ...) +(ExtendToUint64Uint16x8 ...) => (VPMOVZXWQ512 ...) +(ExtendToUint64Uint32x4 ...) => (VPMOVZXDQ256 ...) +(ExtendToUint64Uint32x8 ...) => (VPMOVZXDQ512 ...) (FloorFloat32x4 x) => (VROUNDPS128 [1] x) (FloorFloat32x8 x) => (VROUNDPS256 [1] x) (FloorFloat64x2 x) => (VROUNDPD128 [1] x) @@ -933,6 +855,48 @@ (SHA256Message1Uint32x4 ...) => (SHA256MSG1128 ...) (SHA256Message2Uint32x4 ...) => (SHA256MSG2128 ...) (SHA256TwoRoundsUint32x4 ...) => (SHA256RNDS2128 ...) +(SaturateToInt8Int16x8 ...) => (VPMOVSWB128_128 ...) +(SaturateToInt8Int16x16 ...) => (VPMOVSWB128_256 ...) +(SaturateToInt8Int16x32 ...) => (VPMOVSWB256 ...) +(SaturateToInt8Int32x4 ...) => (VPMOVSDB128_128 ...) +(SaturateToInt8Int32x8 ...) => (VPMOVSDB128_256 ...) +(SaturateToInt8Int32x16 ...) => (VPMOVSDB128_512 ...) +(SaturateToInt8Int64x2 ...) => (VPMOVSQB128_128 ...) +(SaturateToInt8Int64x4 ...) => (VPMOVSQB128_256 ...) +(SaturateToInt8Int64x8 ...) => (VPMOVSQB128_512 ...) +(SaturateToInt16Int32x4 ...) => (VPMOVSDW128_128 ...) +(SaturateToInt16Int32x8 ...) => (VPMOVSDW128_256 ...) +(SaturateToInt16Int32x16 ...) => (VPMOVSDW256 ...) +(SaturateToInt16Int64x2 ...) => (VPMOVSQW128_128 ...) +(SaturateToInt16Int64x4 ...) => (VPMOVSQW128_256 ...) +(SaturateToInt16Int64x8 ...) => (VPMOVSQW128_512 ...) +(SaturateToInt16ConcatInt32x4 ...) => (VPACKSSDW128 ...) +(SaturateToInt16ConcatInt32x8 ...) => (VPACKSSDW256 ...) +(SaturateToInt16ConcatInt32x16 ...) => (VPACKSSDW512 ...) +(SaturateToInt32Int64x2 ...) => (VPMOVSQD128_128 ...) +(SaturateToInt32Int64x4 ...) => (VPMOVSQD128_256 ...) +(SaturateToInt32Int64x8 ...) => (VPMOVSQD256 ...) +(SaturateToUint8Int16x8 ...) => (VPMOVSWB128_128 ...) +(SaturateToUint8Int16x16 ...) => (VPMOVSWB128_256 ...) +(SaturateToUint8Int32x4 ...) => (VPMOVSDB128_128 ...) +(SaturateToUint8Int32x8 ...) => (VPMOVSDB128_256 ...) +(SaturateToUint8Int32x16 ...) => (VPMOVSDB128_512 ...) +(SaturateToUint8Int64x2 ...) => (VPMOVSQB128_128 ...) +(SaturateToUint8Int64x4 ...) => (VPMOVSQB128_256 ...) +(SaturateToUint8Int64x8 ...) => (VPMOVSQB128_512 ...) +(SaturateToUint8Uint16x32 ...) => (VPMOVUSWB256 ...) +(SaturateToUint16Uint32x4 ...) => (VPMOVUSDW128_128 ...) +(SaturateToUint16Uint32x8 ...) => (VPMOVUSDW128_256 ...) +(SaturateToUint16Uint32x16 ...) => (VPMOVUSDW256 ...) +(SaturateToUint16Uint64x2 ...) => (VPMOVUSQW128_128 ...) +(SaturateToUint16Uint64x4 ...) => (VPMOVUSQW128_256 ...) +(SaturateToUint16Uint64x8 ...) => (VPMOVUSQW128_512 ...) +(SaturateToUint16ConcatUint32x4 ...) => (VPACKUSDW128 ...) +(SaturateToUint16ConcatUint32x8 ...) => (VPACKUSDW256 ...) +(SaturateToUint16ConcatUint32x16 ...) => (VPACKUSDW512 ...) +(SaturateToUint32Uint64x2 ...) => (VPMOVUSQD128_128 ...) +(SaturateToUint32Uint64x4 ...) => (VPMOVUSQD128_256 ...) +(SaturateToUint32Uint64x8 ...) => (VPMOVUSQD256 ...) (ScaleFloat32x4 ...) => (VSCALEFPS128 ...) (ScaleFloat32x8 ...) => (VSCALEFPS256 ...) (ScaleFloat32x16 ...) => (VSCALEFPS512 ...) @@ -1260,6 +1224,42 @@ (TruncScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+3] x) (TruncScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+3] x) (TruncScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+3] x) +(TruncateToInt8Int16x8 ...) => (VPMOVWB128_128 ...) +(TruncateToInt8Int16x16 ...) => (VPMOVWB128_256 ...) +(TruncateToInt8Int16x32 ...) => (VPMOVWB256 ...) +(TruncateToInt8Int32x4 ...) => (VPMOVDB128_128 ...) +(TruncateToInt8Int32x8 ...) => (VPMOVDB128_256 ...) +(TruncateToInt8Int32x16 ...) => (VPMOVDB128_512 ...) +(TruncateToInt8Int64x2 ...) => (VPMOVQB128_128 ...) +(TruncateToInt8Int64x4 ...) => (VPMOVQB128_256 ...) +(TruncateToInt8Int64x8 ...) => (VPMOVQB128_512 ...) +(TruncateToInt16Int32x4 ...) => (VPMOVDW128_128 ...) +(TruncateToInt16Int32x8 ...) => (VPMOVDW128_256 ...) +(TruncateToInt16Int32x16 ...) => (VPMOVDW256 ...) +(TruncateToInt16Int64x2 ...) => (VPMOVQW128_128 ...) +(TruncateToInt16Int64x4 ...) => (VPMOVQW128_256 ...) +(TruncateToInt16Int64x8 ...) => (VPMOVQW128_512 ...) +(TruncateToInt32Int64x2 ...) => (VPMOVQD128_128 ...) +(TruncateToInt32Int64x4 ...) => (VPMOVQD128_256 ...) +(TruncateToInt32Int64x8 ...) => (VPMOVQD256 ...) +(TruncateToUint8Uint16x8 ...) => (VPMOVWB128_128 ...) +(TruncateToUint8Uint16x16 ...) => (VPMOVWB128_256 ...) +(TruncateToUint8Uint16x32 ...) => (VPMOVWB256 ...) +(TruncateToUint8Uint32x4 ...) => (VPMOVDB128_128 ...) +(TruncateToUint8Uint32x8 ...) => (VPMOVDB128_256 ...) +(TruncateToUint8Uint32x16 ...) => (VPMOVDB128_512 ...) +(TruncateToUint8Uint64x2 ...) => (VPMOVQB128_128 ...) +(TruncateToUint8Uint64x4 ...) => (VPMOVQB128_256 ...) +(TruncateToUint8Uint64x8 ...) => (VPMOVQB128_512 ...) +(TruncateToUint16Uint32x4 ...) => (VPMOVDW128_128 ...) +(TruncateToUint16Uint32x8 ...) => (VPMOVDW128_256 ...) +(TruncateToUint16Uint32x16 ...) => (VPMOVDW256 ...) +(TruncateToUint16Uint64x2 ...) => (VPMOVQW128_128 ...) +(TruncateToUint16Uint64x4 ...) => (VPMOVQW128_256 ...) +(TruncateToUint16Uint64x8 ...) => (VPMOVQW128_512 ...) +(TruncateToUint32Uint64x2 ...) => (VPMOVQD128_128 ...) +(TruncateToUint32Uint64x4 ...) => (VPMOVQD128_256 ...) +(TruncateToUint32Uint64x8 ...) => (VPMOVQD256 ...) (XorInt8x16 ...) => (VPXOR128 ...) (XorInt8x32 ...) => (VPXOR256 ...) (XorInt8x64 ...) => (VPXORD512 ...) @@ -1440,108 +1440,12 @@ (VMOVDQU8Masked256 (VPALIGNR256 [a] x y) mask) => (VPALIGNRMasked256 [a] x y mask) (VMOVDQU8Masked512 (VPALIGNR512 [a] x y) mask) => (VPALIGNRMasked512 [a] x y mask) (VMOVDQU8Masked128 (VPALIGNR128 [a] x y) mask) => (VPALIGNRMasked128 [a] x y mask) -(VMOVDQU16Masked128 (VPMOVWB128_128 x) mask) => (VPMOVWBMasked128_128 x mask) -(VMOVDQU16Masked256 (VPMOVWB128_256 x) mask) => (VPMOVWBMasked128_256 x mask) -(VMOVDQU16Masked256 (VPMOVWB256 x) mask) => (VPMOVWBMasked256 x mask) -(VMOVDQU32Masked128 (VPMOVDB128_128 x) mask) => (VPMOVDBMasked128_128 x mask) -(VMOVDQU32Masked256 (VPMOVDB128_256 x) mask) => (VPMOVDBMasked128_256 x mask) -(VMOVDQU32Masked512 (VPMOVDB128_512 x) mask) => (VPMOVDBMasked128_512 x mask) -(VMOVDQU64Masked128 (VPMOVQB128_128 x) mask) => (VPMOVQBMasked128_128 x mask) -(VMOVDQU64Masked256 (VPMOVQB128_256 x) mask) => (VPMOVQBMasked128_256 x mask) -(VMOVDQU64Masked512 (VPMOVQB128_512 x) mask) => (VPMOVQBMasked128_512 x mask) -(VMOVDQU16Masked128 (VPMOVSWB128_128 x) mask) => (VPMOVSWBMasked128_128 x mask) -(VMOVDQU16Masked256 (VPMOVSWB128_256 x) mask) => (VPMOVSWBMasked128_256 x mask) -(VMOVDQU16Masked256 (VPMOVSWB256 x) mask) => (VPMOVSWBMasked256 x mask) -(VMOVDQU32Masked128 (VPMOVSDB128_128 x) mask) => (VPMOVSDBMasked128_128 x mask) -(VMOVDQU32Masked256 (VPMOVSDB128_256 x) mask) => (VPMOVSDBMasked128_256 x mask) -(VMOVDQU32Masked512 (VPMOVSDB128_512 x) mask) => (VPMOVSDBMasked128_512 x mask) -(VMOVDQU64Masked128 (VPMOVSQB128_128 x) mask) => (VPMOVSQBMasked128_128 x mask) -(VMOVDQU64Masked256 (VPMOVSQB128_256 x) mask) => (VPMOVSQBMasked128_256 x mask) -(VMOVDQU64Masked512 (VPMOVSQB128_512 x) mask) => (VPMOVSQBMasked128_512 x mask) -(VMOVDQU8Masked256 (VPMOVSXBW256 x) mask) => (VPMOVSXBWMasked256 x mask) -(VMOVDQU8Masked512 (VPMOVSXBW512 x) mask) => (VPMOVSXBWMasked512 x mask) -(VMOVDQU32Masked128 (VPMOVDW128_128 x) mask) => (VPMOVDWMasked128_128 x mask) -(VMOVDQU32Masked256 (VPMOVDW128_256 x) mask) => (VPMOVDWMasked128_256 x mask) -(VMOVDQU32Masked256 (VPMOVDW256 x) mask) => (VPMOVDWMasked256 x mask) -(VMOVDQU64Masked128 (VPMOVQW128_128 x) mask) => (VPMOVQWMasked128_128 x mask) -(VMOVDQU64Masked256 (VPMOVQW128_256 x) mask) => (VPMOVQWMasked128_256 x mask) -(VMOVDQU64Masked512 (VPMOVQW128_512 x) mask) => (VPMOVQWMasked128_512 x mask) -(VMOVDQU32Masked128 (VPMOVSDW128_128 x) mask) => (VPMOVSDWMasked128_128 x mask) -(VMOVDQU32Masked256 (VPMOVSDW128_256 x) mask) => (VPMOVSDWMasked128_256 x mask) -(VMOVDQU32Masked256 (VPMOVSDW256 x) mask) => (VPMOVSDWMasked256 x mask) -(VMOVDQU64Masked128 (VPMOVSQW128_128 x) mask) => (VPMOVSQWMasked128_128 x mask) -(VMOVDQU64Masked256 (VPMOVSQW128_256 x) mask) => (VPMOVSQWMasked128_256 x mask) -(VMOVDQU64Masked512 (VPMOVSQW128_512 x) mask) => (VPMOVSQWMasked128_512 x mask) -(VMOVDQU32Masked128 (VPACKSSDW128 x y) mask) => (VPACKSSDWMasked128 x y mask) -(VMOVDQU32Masked256 (VPACKSSDW256 x y) mask) => (VPACKSSDWMasked256 x y mask) -(VMOVDQU32Masked512 (VPACKSSDW512 x y) mask) => (VPACKSSDWMasked512 x y mask) -(VMOVDQU8Masked128 (VPMOVSXBW128 x) mask) => (VPMOVSXBWMasked128 x mask) (VMOVDQU32Masked128 (VCVTTPS2DQ128 x) mask) => (VCVTTPS2DQMasked128 x mask) (VMOVDQU32Masked256 (VCVTTPS2DQ256 x) mask) => (VCVTTPS2DQMasked256 x mask) (VMOVDQU32Masked512 (VCVTTPS2DQ512 x) mask) => (VCVTTPS2DQMasked512 x mask) -(VMOVDQU8Masked512 (VPMOVSXBD512 x) mask) => (VPMOVSXBDMasked512 x mask) -(VMOVDQU16Masked256 (VPMOVSXWD256 x) mask) => (VPMOVSXWDMasked256 x mask) -(VMOVDQU16Masked512 (VPMOVSXWD512 x) mask) => (VPMOVSXWDMasked512 x mask) -(VMOVDQU64Masked128 (VPMOVQD128_128 x) mask) => (VPMOVQDMasked128_128 x mask) -(VMOVDQU64Masked256 (VPMOVQD128_256 x) mask) => (VPMOVQDMasked128_256 x mask) -(VMOVDQU64Masked256 (VPMOVQD256 x) mask) => (VPMOVQDMasked256 x mask) -(VMOVDQU64Masked128 (VPMOVSQD128_128 x) mask) => (VPMOVSQDMasked128_128 x mask) -(VMOVDQU64Masked256 (VPMOVSQD128_256 x) mask) => (VPMOVSQDMasked128_256 x mask) -(VMOVDQU64Masked256 (VPMOVSQD256 x) mask) => (VPMOVSQDMasked256 x mask) -(VMOVDQU8Masked128 (VPMOVSXBD128 x) mask) => (VPMOVSXBDMasked128 x mask) -(VMOVDQU16Masked128 (VPMOVSXWD128 x) mask) => (VPMOVSXWDMasked128 x mask) -(VMOVDQU8Masked256 (VPMOVSXBD256 x) mask) => (VPMOVSXBDMasked256 x mask) -(VMOVDQU16Masked512 (VPMOVSXWQ512 x) mask) => (VPMOVSXWQMasked512 x mask) -(VMOVDQU32Masked256 (VPMOVSXDQ256 x) mask) => (VPMOVSXDQMasked256 x mask) -(VMOVDQU32Masked512 (VPMOVSXDQ512 x) mask) => (VPMOVSXDQMasked512 x mask) -(VMOVDQU8Masked128 (VPMOVSXBQ128 x) mask) => (VPMOVSXBQMasked128 x mask) -(VMOVDQU16Masked128 (VPMOVSXWQ128 x) mask) => (VPMOVSXWQMasked128 x mask) -(VMOVDQU32Masked128 (VPMOVSXDQ128 x) mask) => (VPMOVSXDQMasked128 x mask) -(VMOVDQU8Masked256 (VPMOVSXBQ256 x) mask) => (VPMOVSXBQMasked256 x mask) -(VMOVDQU8Masked512 (VPMOVSXBQ512 x) mask) => (VPMOVSXBQMasked512 x mask) -(VMOVDQU16Masked128 (VPMOVUSWB128_128 x) mask) => (VPMOVUSWBMasked128_128 x mask) -(VMOVDQU16Masked256 (VPMOVUSWB128_256 x) mask) => (VPMOVUSWBMasked128_256 x mask) -(VMOVDQU16Masked256 (VPMOVUSWB256 x) mask) => (VPMOVUSWBMasked256 x mask) -(VMOVDQU32Masked128 (VPMOVUSDB128_128 x) mask) => (VPMOVUSDBMasked128_128 x mask) -(VMOVDQU32Masked256 (VPMOVUSDB128_256 x) mask) => (VPMOVUSDBMasked128_256 x mask) -(VMOVDQU32Masked512 (VPMOVUSDB128_512 x) mask) => (VPMOVUSDBMasked128_512 x mask) -(VMOVDQU64Masked128 (VPMOVUSQB128_128 x) mask) => (VPMOVUSQBMasked128_128 x mask) -(VMOVDQU64Masked256 (VPMOVUSQB128_256 x) mask) => (VPMOVUSQBMasked128_256 x mask) -(VMOVDQU64Masked512 (VPMOVUSQB128_512 x) mask) => (VPMOVUSQBMasked128_512 x mask) -(VMOVDQU8Masked256 (VPMOVZXBW256 x) mask) => (VPMOVZXBWMasked256 x mask) -(VMOVDQU8Masked512 (VPMOVZXBW512 x) mask) => (VPMOVZXBWMasked512 x mask) -(VMOVDQU32Masked128 (VPMOVUSDW128_128 x) mask) => (VPMOVUSDWMasked128_128 x mask) -(VMOVDQU32Masked256 (VPMOVUSDW128_256 x) mask) => (VPMOVUSDWMasked128_256 x mask) -(VMOVDQU32Masked256 (VPMOVUSDW256 x) mask) => (VPMOVUSDWMasked256 x mask) -(VMOVDQU64Masked128 (VPMOVUSQW128_128 x) mask) => (VPMOVUSQWMasked128_128 x mask) -(VMOVDQU64Masked256 (VPMOVUSQW128_256 x) mask) => (VPMOVUSQWMasked128_256 x mask) -(VMOVDQU64Masked512 (VPMOVUSQW128_512 x) mask) => (VPMOVUSQWMasked128_512 x mask) -(VMOVDQU32Masked128 (VPACKUSDW128 x y) mask) => (VPACKUSDWMasked128 x y mask) -(VMOVDQU32Masked256 (VPACKUSDW256 x y) mask) => (VPACKUSDWMasked256 x y mask) -(VMOVDQU32Masked512 (VPACKUSDW512 x y) mask) => (VPACKUSDWMasked512 x y mask) -(VMOVDQU8Masked128 (VPMOVZXBW128 x) mask) => (VPMOVZXBWMasked128 x mask) (VMOVDQU32Masked128 (VCVTPS2UDQ128 x) mask) => (VCVTPS2UDQMasked128 x mask) (VMOVDQU32Masked256 (VCVTPS2UDQ256 x) mask) => (VCVTPS2UDQMasked256 x mask) (VMOVDQU32Masked512 (VCVTPS2UDQ512 x) mask) => (VCVTPS2UDQMasked512 x mask) -(VMOVDQU8Masked512 (VPMOVZXBD512 x) mask) => (VPMOVZXBDMasked512 x mask) -(VMOVDQU16Masked256 (VPMOVZXWD256 x) mask) => (VPMOVZXWDMasked256 x mask) -(VMOVDQU16Masked512 (VPMOVZXWD512 x) mask) => (VPMOVZXWDMasked512 x mask) -(VMOVDQU64Masked128 (VPMOVUSQD128_128 x) mask) => (VPMOVUSQDMasked128_128 x mask) -(VMOVDQU64Masked256 (VPMOVUSQD128_256 x) mask) => (VPMOVUSQDMasked128_256 x mask) -(VMOVDQU64Masked256 (VPMOVUSQD256 x) mask) => (VPMOVUSQDMasked256 x mask) -(VMOVDQU8Masked128 (VPMOVZXBD128 x) mask) => (VPMOVZXBDMasked128 x mask) -(VMOVDQU16Masked128 (VPMOVZXWD128 x) mask) => (VPMOVZXWDMasked128 x mask) -(VMOVDQU8Masked256 (VPMOVZXBD256 x) mask) => (VPMOVZXBDMasked256 x mask) -(VMOVDQU16Masked512 (VPMOVZXWQ512 x) mask) => (VPMOVZXWQMasked512 x mask) -(VMOVDQU32Masked256 (VPMOVZXDQ256 x) mask) => (VPMOVZXDQMasked256 x mask) -(VMOVDQU32Masked512 (VPMOVZXDQ512 x) mask) => (VPMOVZXDQMasked512 x mask) -(VMOVDQU8Masked128 (VPMOVZXBQ128 x) mask) => (VPMOVZXBQMasked128 x mask) -(VMOVDQU16Masked128 (VPMOVZXWQ128 x) mask) => (VPMOVZXWQMasked128 x mask) -(VMOVDQU32Masked128 (VPMOVZXDQ128 x) mask) => (VPMOVZXDQMasked128 x mask) -(VMOVDQU16Masked256 (VPMOVSXWQ256 x) mask) => (VPMOVSXWQMasked256 x mask) -(VMOVDQU8Masked256 (VPMOVZXBQ256 x) mask) => (VPMOVZXBQMasked256 x mask) -(VMOVDQU16Masked256 (VPMOVZXWQ256 x) mask) => (VPMOVZXWQMasked256 x mask) -(VMOVDQU8Masked512 (VPMOVZXBQ512 x) mask) => (VPMOVZXBQMasked512 x mask) (VMOVDQU32Masked128 (VDIVPS128 x y) mask) => (VDIVPSMasked128 x y mask) (VMOVDQU32Masked256 (VDIVPS256 x y) mask) => (VDIVPSMasked256 x y mask) (VMOVDQU32Masked512 (VDIVPS512 x y) mask) => (VDIVPSMasked512 x y mask) @@ -1560,6 +1464,42 @@ (VMOVDQU32Masked128 (VPDPBUSDS128 x y z) mask) => (VPDPBUSDSMasked128 x y z mask) (VMOVDQU32Masked256 (VPDPBUSDS256 x y z) mask) => (VPDPBUSDSMasked256 x y z mask) (VMOVDQU32Masked512 (VPDPBUSDS512 x y z) mask) => (VPDPBUSDSMasked512 x y z mask) +(VMOVDQU8Masked128 (VPMOVSXBQ128 x) mask) => (VPMOVSXBQMasked128 x mask) +(VMOVDQU16Masked128 (VPMOVSXWQ128 x) mask) => (VPMOVSXWQMasked128 x mask) +(VMOVDQU32Masked128 (VPMOVSXDQ128 x) mask) => (VPMOVSXDQMasked128 x mask) +(VMOVDQU8Masked128 (VPMOVZXBQ128 x) mask) => (VPMOVZXBQMasked128 x mask) +(VMOVDQU16Masked128 (VPMOVZXWQ128 x) mask) => (VPMOVZXWQMasked128 x mask) +(VMOVDQU32Masked128 (VPMOVZXDQ128 x) mask) => (VPMOVZXDQMasked128 x mask) +(VMOVDQU8Masked128 (VPMOVSXBD128 x) mask) => (VPMOVSXBDMasked128 x mask) +(VMOVDQU16Masked128 (VPMOVSXWD128 x) mask) => (VPMOVSXWDMasked128 x mask) +(VMOVDQU8Masked256 (VPMOVSXBQ256 x) mask) => (VPMOVSXBQMasked256 x mask) +(VMOVDQU16Masked256 (VPMOVSXWQ256 x) mask) => (VPMOVSXWQMasked256 x mask) +(VMOVDQU8Masked128 (VPMOVZXBD128 x) mask) => (VPMOVZXBDMasked128 x mask) +(VMOVDQU16Masked128 (VPMOVZXWD128 x) mask) => (VPMOVZXWDMasked128 x mask) +(VMOVDQU8Masked256 (VPMOVZXBQ256 x) mask) => (VPMOVZXBQMasked256 x mask) +(VMOVDQU16Masked256 (VPMOVZXWQ256 x) mask) => (VPMOVZXWQMasked256 x mask) +(VMOVDQU8Masked128 (VPMOVSXBW128 x) mask) => (VPMOVSXBWMasked128 x mask) +(VMOVDQU8Masked256 (VPMOVSXBD256 x) mask) => (VPMOVSXBDMasked256 x mask) +(VMOVDQU8Masked512 (VPMOVSXBQ512 x) mask) => (VPMOVSXBQMasked512 x mask) +(VMOVDQU8Masked128 (VPMOVZXBW128 x) mask) => (VPMOVZXBWMasked128 x mask) +(VMOVDQU8Masked256 (VPMOVZXBD256 x) mask) => (VPMOVZXBDMasked256 x mask) +(VMOVDQU8Masked512 (VPMOVZXBQ512 x) mask) => (VPMOVZXBQMasked512 x mask) +(VMOVDQU8Masked256 (VPMOVSXBW256 x) mask) => (VPMOVSXBWMasked256 x mask) +(VMOVDQU8Masked512 (VPMOVSXBW512 x) mask) => (VPMOVSXBWMasked512 x mask) +(VMOVDQU8Masked512 (VPMOVSXBD512 x) mask) => (VPMOVSXBDMasked512 x mask) +(VMOVDQU16Masked256 (VPMOVSXWD256 x) mask) => (VPMOVSXWDMasked256 x mask) +(VMOVDQU16Masked512 (VPMOVSXWD512 x) mask) => (VPMOVSXWDMasked512 x mask) +(VMOVDQU16Masked512 (VPMOVSXWQ512 x) mask) => (VPMOVSXWQMasked512 x mask) +(VMOVDQU32Masked256 (VPMOVSXDQ256 x) mask) => (VPMOVSXDQMasked256 x mask) +(VMOVDQU32Masked512 (VPMOVSXDQ512 x) mask) => (VPMOVSXDQMasked512 x mask) +(VMOVDQU8Masked256 (VPMOVZXBW256 x) mask) => (VPMOVZXBWMasked256 x mask) +(VMOVDQU8Masked512 (VPMOVZXBW512 x) mask) => (VPMOVZXBWMasked512 x mask) +(VMOVDQU8Masked512 (VPMOVZXBD512 x) mask) => (VPMOVZXBDMasked512 x mask) +(VMOVDQU16Masked256 (VPMOVZXWD256 x) mask) => (VPMOVZXWDMasked256 x mask) +(VMOVDQU16Masked512 (VPMOVZXWD512 x) mask) => (VPMOVZXWDMasked512 x mask) +(VMOVDQU16Masked512 (VPMOVZXWQ512 x) mask) => (VPMOVZXWQMasked512 x mask) +(VMOVDQU32Masked256 (VPMOVZXDQ256 x) mask) => (VPMOVZXDQMasked256 x mask) +(VMOVDQU32Masked512 (VPMOVZXDQ512 x) mask) => (VPMOVZXDQMasked512 x mask) (VMOVDQU8Masked128 (VGF2P8AFFINEINVQB128 [a] x y) mask) => (VGF2P8AFFINEINVQBMasked128 [a] x y mask) (VMOVDQU8Masked256 (VGF2P8AFFINEINVQB256 [a] x y) mask) => (VGF2P8AFFINEINVQBMasked256 [a] x y mask) (VMOVDQU8Masked512 (VGF2P8AFFINEINVQB512 [a] x y) mask) => (VGF2P8AFFINEINVQBMasked512 [a] x y mask) @@ -1737,6 +1677,40 @@ (VMOVDQU64Masked128 (VPRORVQ128 x y) mask) => (VPRORVQMasked128 x y mask) (VMOVDQU64Masked256 (VPRORVQ256 x y) mask) => (VPRORVQMasked256 x y mask) (VMOVDQU64Masked512 (VPRORVQ512 x y) mask) => (VPRORVQMasked512 x y mask) +(VMOVDQU16Masked128 (VPMOVSWB128_128 x) mask) => (VPMOVSWBMasked128_128 x mask) +(VMOVDQU16Masked256 (VPMOVSWB128_256 x) mask) => (VPMOVSWBMasked128_256 x mask) +(VMOVDQU16Masked256 (VPMOVSWB256 x) mask) => (VPMOVSWBMasked256 x mask) +(VMOVDQU32Masked128 (VPMOVSDB128_128 x) mask) => (VPMOVSDBMasked128_128 x mask) +(VMOVDQU32Masked256 (VPMOVSDB128_256 x) mask) => (VPMOVSDBMasked128_256 x mask) +(VMOVDQU32Masked512 (VPMOVSDB128_512 x) mask) => (VPMOVSDBMasked128_512 x mask) +(VMOVDQU64Masked128 (VPMOVSQB128_128 x) mask) => (VPMOVSQBMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVSQB128_256 x) mask) => (VPMOVSQBMasked128_256 x mask) +(VMOVDQU64Masked512 (VPMOVSQB128_512 x) mask) => (VPMOVSQBMasked128_512 x mask) +(VMOVDQU32Masked128 (VPACKSSDW128 x y) mask) => (VPACKSSDWMasked128 x y mask) +(VMOVDQU32Masked256 (VPACKSSDW256 x y) mask) => (VPACKSSDWMasked256 x y mask) +(VMOVDQU32Masked512 (VPACKSSDW512 x y) mask) => (VPACKSSDWMasked512 x y mask) +(VMOVDQU32Masked128 (VPMOVSDW128_128 x) mask) => (VPMOVSDWMasked128_128 x mask) +(VMOVDQU32Masked256 (VPMOVSDW128_256 x) mask) => (VPMOVSDWMasked128_256 x mask) +(VMOVDQU32Masked256 (VPMOVSDW256 x) mask) => (VPMOVSDWMasked256 x mask) +(VMOVDQU64Masked128 (VPMOVSQW128_128 x) mask) => (VPMOVSQWMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVSQW128_256 x) mask) => (VPMOVSQWMasked128_256 x mask) +(VMOVDQU64Masked512 (VPMOVSQW128_512 x) mask) => (VPMOVSQWMasked128_512 x mask) +(VMOVDQU64Masked128 (VPMOVSQD128_128 x) mask) => (VPMOVSQDMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVSQD128_256 x) mask) => (VPMOVSQDMasked128_256 x mask) +(VMOVDQU64Masked256 (VPMOVSQD256 x) mask) => (VPMOVSQDMasked256 x mask) +(VMOVDQU16Masked256 (VPMOVUSWB256 x) mask) => (VPMOVUSWBMasked256 x mask) +(VMOVDQU32Masked128 (VPACKUSDW128 x y) mask) => (VPACKUSDWMasked128 x y mask) +(VMOVDQU32Masked256 (VPACKUSDW256 x y) mask) => (VPACKUSDWMasked256 x y mask) +(VMOVDQU32Masked512 (VPACKUSDW512 x y) mask) => (VPACKUSDWMasked512 x y mask) +(VMOVDQU32Masked128 (VPMOVUSDW128_128 x) mask) => (VPMOVUSDWMasked128_128 x mask) +(VMOVDQU32Masked256 (VPMOVUSDW128_256 x) mask) => (VPMOVUSDWMasked128_256 x mask) +(VMOVDQU32Masked256 (VPMOVUSDW256 x) mask) => (VPMOVUSDWMasked256 x mask) +(VMOVDQU64Masked128 (VPMOVUSQW128_128 x) mask) => (VPMOVUSQWMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVUSQW128_256 x) mask) => (VPMOVUSQWMasked128_256 x mask) +(VMOVDQU64Masked512 (VPMOVUSQW128_512 x) mask) => (VPMOVUSQWMasked128_512 x mask) +(VMOVDQU64Masked128 (VPMOVUSQD128_128 x) mask) => (VPMOVUSQDMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVUSQD128_256 x) mask) => (VPMOVUSQDMasked128_256 x mask) +(VMOVDQU64Masked256 (VPMOVUSQD256 x) mask) => (VPMOVUSQDMasked256 x mask) (VMOVDQU32Masked128 (VSCALEFPS128 x y) mask) => (VSCALEFPSMasked128 x y mask) (VMOVDQU32Masked256 (VSCALEFPS256 x y) mask) => (VSCALEFPSMasked256 x y mask) (VMOVDQU32Masked512 (VSCALEFPS512 x y) mask) => (VSCALEFPSMasked512 x y mask) @@ -1869,6 +1843,24 @@ (VMOVDQU16Masked128 (VPSUBUSW128 x y) mask) => (VPSUBUSWMasked128 x y mask) (VMOVDQU16Masked256 (VPSUBUSW256 x y) mask) => (VPSUBUSWMasked256 x y mask) (VMOVDQU16Masked512 (VPSUBUSW512 x y) mask) => (VPSUBUSWMasked512 x y mask) +(VMOVDQU16Masked128 (VPMOVWB128_128 x) mask) => (VPMOVWBMasked128_128 x mask) +(VMOVDQU16Masked256 (VPMOVWB128_256 x) mask) => (VPMOVWBMasked128_256 x mask) +(VMOVDQU16Masked256 (VPMOVWB256 x) mask) => (VPMOVWBMasked256 x mask) +(VMOVDQU32Masked128 (VPMOVDB128_128 x) mask) => (VPMOVDBMasked128_128 x mask) +(VMOVDQU32Masked256 (VPMOVDB128_256 x) mask) => (VPMOVDBMasked128_256 x mask) +(VMOVDQU32Masked512 (VPMOVDB128_512 x) mask) => (VPMOVDBMasked128_512 x mask) +(VMOVDQU64Masked128 (VPMOVQB128_128 x) mask) => (VPMOVQBMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVQB128_256 x) mask) => (VPMOVQBMasked128_256 x mask) +(VMOVDQU64Masked512 (VPMOVQB128_512 x) mask) => (VPMOVQBMasked128_512 x mask) +(VMOVDQU32Masked128 (VPMOVDW128_128 x) mask) => (VPMOVDWMasked128_128 x mask) +(VMOVDQU32Masked256 (VPMOVDW128_256 x) mask) => (VPMOVDWMasked128_256 x mask) +(VMOVDQU32Masked256 (VPMOVDW256 x) mask) => (VPMOVDWMasked256 x mask) +(VMOVDQU64Masked128 (VPMOVQW128_128 x) mask) => (VPMOVQWMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVQW128_256 x) mask) => (VPMOVQWMasked128_256 x mask) +(VMOVDQU64Masked512 (VPMOVQW128_512 x) mask) => (VPMOVQWMasked128_512 x mask) +(VMOVDQU64Masked128 (VPMOVQD128_128 x) mask) => (VPMOVQDMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVQD128_256 x) mask) => (VPMOVQDMasked128_256 x mask) +(VMOVDQU64Masked256 (VPMOVQD256 x) mask) => (VPMOVQDMasked256 x mask) (VMOVDQU32Masked512 (VPXORD512 x y) mask) => (VPXORDMasked512 x y mask) (VMOVDQU64Masked512 (VPXORQ512 x y) mask) => (VPXORQMasked512 x y mask) (VMOVDQU32Masked256 (VPSHUFD256 [a] x) mask) => (VPSHUFDMasked256 [a] x mask) @@ -1935,7 +1927,6 @@ (VPBLENDMDMasked512 dst (VPMOVDW256 x) mask) => (VPMOVDWMasked256Merging dst x mask) (VPBLENDMDMasked512 dst (VPMOVSDB128_512 x) mask) => (VPMOVSDBMasked128_512Merging dst x mask) (VPBLENDMDMasked512 dst (VPMOVSDW256 x) mask) => (VPMOVSDWMasked256Merging dst x mask) -(VPBLENDMDMasked512 dst (VPMOVUSDB128_512 x) mask) => (VPMOVUSDBMasked128_512Merging dst x mask) (VPBLENDMDMasked512 dst (VPMOVUSDW256 x) mask) => (VPMOVUSDWMasked256Merging dst x mask) (VPBLENDMDMasked512 dst (VPMULLD512 x y) mask) => (VPMULLDMasked512Merging dst x y mask) (VPBLENDMDMasked512 dst (VPOPCNTD512 x) mask) => (VPOPCNTDMasked512Merging dst x mask) @@ -1980,7 +1971,6 @@ (VPBLENDMQMasked512 dst (VPMOVSQB128_512 x) mask) => (VPMOVSQBMasked128_512Merging dst x mask) (VPBLENDMQMasked512 dst (VPMOVSQD256 x) mask) => (VPMOVSQDMasked256Merging dst x mask) (VPBLENDMQMasked512 dst (VPMOVSQW128_512 x) mask) => (VPMOVSQWMasked128_512Merging dst x mask) -(VPBLENDMQMasked512 dst (VPMOVUSQB128_512 x) mask) => (VPMOVUSQBMasked128_512Merging dst x mask) (VPBLENDMQMasked512 dst (VPMOVUSQD256 x) mask) => (VPMOVUSQDMasked256Merging dst x mask) (VPBLENDMQMasked512 dst (VPMOVUSQW128_512 x) mask) => (VPMOVUSQWMasked128_512Merging dst x mask) (VPBLENDMQMasked512 dst (VPMULLQ512 x y) mask) => (VPMULLQMasked512Merging dst x y mask) @@ -2129,12 +2119,9 @@ (VPBLENDVB128 dst (VPMOVSXWQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked128Merging dst x (VPMOVVec16x8ToM mask)) (VPBLENDVB128 dst (VPMOVSXWQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked256Merging dst x (VPMOVVec16x8ToM mask)) (VPBLENDVB128 dst (VPMOVSXWQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked512Merging dst x (VPMOVVec16x8ToM mask)) -(VPBLENDVB128 dst (VPMOVUSDB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDBMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) (VPBLENDVB128 dst (VPMOVUSDW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDWMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) -(VPBLENDVB128 dst (VPMOVUSQB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQBMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) (VPBLENDVB128 dst (VPMOVUSQD128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQDMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) (VPBLENDVB128 dst (VPMOVUSQW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQWMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) -(VPBLENDVB128 dst (VPMOVUSWB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSWBMasked128_128Merging dst x (VPMOVVec16x8ToM mask)) (VPBLENDVB128 dst (VPMOVWB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVWBMasked128_128Merging dst x (VPMOVVec16x8ToM mask)) (VPBLENDVB128 dst (VPMOVZXBD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked128Merging dst x (VPMOVVec8x16ToM mask)) (VPBLENDVB128 dst (VPMOVZXBD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked256Merging dst x (VPMOVVec8x16ToM mask)) @@ -2277,12 +2264,9 @@ (VPBLENDVB256 dst (VPMOVSXBW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked512Merging dst x (VPMOVVec8x32ToM mask)) (VPBLENDVB256 dst (VPMOVSXDQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked512Merging dst x (VPMOVVec32x8ToM mask)) (VPBLENDVB256 dst (VPMOVSXWD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked512Merging dst x (VPMOVVec16x16ToM mask)) -(VPBLENDVB256 dst (VPMOVUSDB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDBMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) (VPBLENDVB256 dst (VPMOVUSDW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDWMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) -(VPBLENDVB256 dst (VPMOVUSQB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQBMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) (VPBLENDVB256 dst (VPMOVUSQD128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQDMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) (VPBLENDVB256 dst (VPMOVUSQW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQWMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) -(VPBLENDVB256 dst (VPMOVUSWB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSWBMasked128_256Merging dst x (VPMOVVec16x16ToM mask)) (VPBLENDVB256 dst (VPMOVWB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVWBMasked128_256Merging dst x (VPMOVVec16x16ToM mask)) (VPBLENDVB256 dst (VPMOVZXBW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked512Merging dst x (VPMOVVec8x32ToM mask)) (VPBLENDVB256 dst (VPMOVZXDQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked512Merging dst x (VPMOVVec32x8ToM mask)) @@ -2443,18 +2427,10 @@ (VPERMI2QMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2QMasked256load {sym} [off] x y ptr mask mem) (VPERMI2PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PDMasked512load {sym} [off] x y ptr mask mem) (VPERMI2QMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2QMasked512load {sym} [off] x y ptr mask mem) -(VPACKSSDW512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDW512load {sym} [off] x ptr mem) -(VPACKSSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked128load {sym} [off] x ptr mask mem) -(VPACKSSDWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked256load {sym} [off] x ptr mask mem) -(VPACKSSDWMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked512load {sym} [off] x ptr mask mem) (VCVTTPS2DQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTTPS2DQ512load {sym} [off] ptr mem) (VCVTTPS2DQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTTPS2DQMasked128load {sym} [off] ptr mask mem) (VCVTTPS2DQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTTPS2DQMasked256load {sym} [off] ptr mask mem) (VCVTTPS2DQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTTPS2DQMasked512load {sym} [off] ptr mask mem) -(VPACKUSDW512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDW512load {sym} [off] x ptr mem) -(VPACKUSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDWMasked128load {sym} [off] x ptr mask mem) -(VPACKUSDWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDWMasked256load {sym} [off] x ptr mask mem) -(VPACKUSDWMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDWMasked512load {sym} [off] x ptr mask mem) (VCVTPS2UDQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTPS2UDQ128load {sym} [off] ptr mem) (VCVTPS2UDQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTPS2UDQ256load {sym} [off] ptr mem) (VCVTPS2UDQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTPS2UDQ512load {sym} [off] ptr mem) @@ -2745,6 +2721,14 @@ (VPRORVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORVQMasked128load {sym} [off] x ptr mask mem) (VPRORVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORVQMasked256load {sym} [off] x ptr mask mem) (VPRORVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORVQMasked512load {sym} [off] x ptr mask mem) +(VPACKSSDW512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDW512load {sym} [off] x ptr mem) +(VPACKSSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked128load {sym} [off] x ptr mask mem) +(VPACKSSDWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked256load {sym} [off] x ptr mask mem) +(VPACKSSDWMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked512load {sym} [off] x ptr mask mem) +(VPACKUSDW512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDW512load {sym} [off] x ptr mem) +(VPACKUSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDWMasked128load {sym} [off] x ptr mask mem) +(VPACKUSDWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDWMasked256load {sym} [off] x ptr mask mem) +(VPACKUSDWMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDWMasked512load {sym} [off] x ptr mask mem) (VSCALEFPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPS128load {sym} [off] x ptr mem) (VSCALEFPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPS256load {sym} [off] x ptr mem) (VSCALEFPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPS512load {sym} [off] x ptr mem) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go index cf8351beb0e..53d18b22d6b 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go @@ -694,24 +694,12 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPMOVSXWQMasked128", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVSXWQMasked256", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMOVSXWQMasked512", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPMOVUSDB128_128", argLength: 1, reg: w11, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVUSDB128_256", argLength: 1, reg: w11, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVUSDB128_512", argLength: 1, reg: w11, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVUSDBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVUSDBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVUSDBMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSDW128_128", argLength: 1, reg: w11, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSDW128_256", argLength: 1, reg: w11, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSDW256", argLength: 1, reg: w11, asm: "VPMOVUSDW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMOVUSDWMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSDWMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSDWMasked256", argLength: 2, reg: wkw, asm: "VPMOVUSDW", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVUSQB128_128", argLength: 1, reg: w11, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVUSQB128_256", argLength: 1, reg: w11, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVUSQB128_512", argLength: 1, reg: w11, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVUSQBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVUSQBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVUSQBMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSQD128_128", argLength: 1, reg: w11, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSQD128_256", argLength: 1, reg: w11, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSQD256", argLength: 1, reg: w11, asm: "VPMOVUSQD", commutative: false, typ: "Vec256", resultInArg0: false}, @@ -724,11 +712,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPMOVUSQWMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSQWMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSQWMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVUSWB128_128", argLength: 1, reg: w11, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVUSWB128_256", argLength: 1, reg: w11, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSWB256", argLength: 1, reg: w11, asm: "VPMOVUSWB", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVUSWBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVUSWBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSWBMasked256", argLength: 2, reg: wkw, asm: "VPMOVUSWB", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMOVWB128_128", argLength: 1, reg: w11, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVWB128_256", argLength: 1, reg: w11, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false}, @@ -2188,23 +2172,15 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPMOVSXWQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVSXWQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPMOVSXWQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VPMOVUSDBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPMOVUSDBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPMOVUSDBMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVUSDWMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVUSDWMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVUSDWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPMOVUSQBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPMOVUSQBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPMOVUSQBMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVUSQDMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVUSQDMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVUSQDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPMOVUSQWMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVUSQWMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVUSQWMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPMOVUSWBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPMOVUSWBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVUSWBMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPMOVWBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVWBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: true}, diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go index 5683fcef0df..2dda588df49 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go +++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go @@ -237,126 +237,12 @@ func simdGenericOps() []opData { {name: "ConcatPermuteUint64x2", argLength: 3, commutative: false}, {name: "ConcatPermuteUint64x4", argLength: 3, commutative: false}, {name: "ConcatPermuteUint64x8", argLength: 3, commutative: false}, - {name: "ConvertToInt8Int16x8", argLength: 1, commutative: false}, - {name: "ConvertToInt8Int16x16", argLength: 1, commutative: false}, - {name: "ConvertToInt8Int16x32", argLength: 1, commutative: false}, - {name: "ConvertToInt8Int32x4", argLength: 1, commutative: false}, - {name: "ConvertToInt8Int32x8", argLength: 1, commutative: false}, - {name: "ConvertToInt8Int32x16", argLength: 1, commutative: false}, - {name: "ConvertToInt8Int64x2", argLength: 1, commutative: false}, - {name: "ConvertToInt8Int64x4", argLength: 1, commutative: false}, - {name: "ConvertToInt8Int64x8", argLength: 1, commutative: false}, - {name: "ConvertToInt8SaturatedInt16x8", argLength: 1, commutative: false}, - {name: "ConvertToInt8SaturatedInt16x16", argLength: 1, commutative: false}, - {name: "ConvertToInt8SaturatedInt16x32", argLength: 1, commutative: false}, - {name: "ConvertToInt8SaturatedInt32x4", argLength: 1, commutative: false}, - {name: "ConvertToInt8SaturatedInt32x8", argLength: 1, commutative: false}, - {name: "ConvertToInt8SaturatedInt32x16", argLength: 1, commutative: false}, - {name: "ConvertToInt8SaturatedInt64x2", argLength: 1, commutative: false}, - {name: "ConvertToInt8SaturatedInt64x4", argLength: 1, commutative: false}, - {name: "ConvertToInt8SaturatedInt64x8", argLength: 1, commutative: false}, - {name: "ConvertToInt16Int8x16", argLength: 1, commutative: false}, - {name: "ConvertToInt16Int8x32", argLength: 1, commutative: false}, - {name: "ConvertToInt16Int32x4", argLength: 1, commutative: false}, - {name: "ConvertToInt16Int32x8", argLength: 1, commutative: false}, - {name: "ConvertToInt16Int32x16", argLength: 1, commutative: false}, - {name: "ConvertToInt16Int64x2", argLength: 1, commutative: false}, - {name: "ConvertToInt16Int64x4", argLength: 1, commutative: false}, - {name: "ConvertToInt16Int64x8", argLength: 1, commutative: false}, - {name: "ConvertToInt16SaturatedInt32x4", argLength: 1, commutative: false}, - {name: "ConvertToInt16SaturatedInt32x8", argLength: 1, commutative: false}, - {name: "ConvertToInt16SaturatedInt32x16", argLength: 1, commutative: false}, - {name: "ConvertToInt16SaturatedInt64x2", argLength: 1, commutative: false}, - {name: "ConvertToInt16SaturatedInt64x4", argLength: 1, commutative: false}, - {name: "ConvertToInt16SaturatedInt64x8", argLength: 1, commutative: false}, - {name: "ConvertToInt16SaturatedPackedInt32x4", argLength: 2, commutative: false}, - {name: "ConvertToInt16SaturatedPackedInt32x8", argLength: 2, commutative: false}, - {name: "ConvertToInt16SaturatedPackedInt32x16", argLength: 2, commutative: false}, - {name: "ConvertToInt16x8Int8x16", argLength: 1, commutative: false}, {name: "ConvertToInt32Float32x4", argLength: 1, commutative: false}, {name: "ConvertToInt32Float32x8", argLength: 1, commutative: false}, {name: "ConvertToInt32Float32x16", argLength: 1, commutative: false}, - {name: "ConvertToInt32Int8x16", argLength: 1, commutative: false}, - {name: "ConvertToInt32Int16x8", argLength: 1, commutative: false}, - {name: "ConvertToInt32Int16x16", argLength: 1, commutative: false}, - {name: "ConvertToInt32Int64x2", argLength: 1, commutative: false}, - {name: "ConvertToInt32Int64x4", argLength: 1, commutative: false}, - {name: "ConvertToInt32Int64x8", argLength: 1, commutative: false}, - {name: "ConvertToInt32SaturatedInt64x2", argLength: 1, commutative: false}, - {name: "ConvertToInt32SaturatedInt64x4", argLength: 1, commutative: false}, - {name: "ConvertToInt32SaturatedInt64x8", argLength: 1, commutative: false}, - {name: "ConvertToInt32x4Int8x16", argLength: 1, commutative: false}, - {name: "ConvertToInt32x4Int16x8", argLength: 1, commutative: false}, - {name: "ConvertToInt32x8Int8x16", argLength: 1, commutative: false}, - {name: "ConvertToInt64Int16x8", argLength: 1, commutative: false}, - {name: "ConvertToInt64Int32x4", argLength: 1, commutative: false}, - {name: "ConvertToInt64Int32x8", argLength: 1, commutative: false}, - {name: "ConvertToInt64x2Int8x16", argLength: 1, commutative: false}, - {name: "ConvertToInt64x2Int16x8", argLength: 1, commutative: false}, - {name: "ConvertToInt64x2Int32x4", argLength: 1, commutative: false}, - {name: "ConvertToInt64x4Int8x16", argLength: 1, commutative: false}, - {name: "ConvertToInt64x8Int8x16", argLength: 1, commutative: false}, - {name: "ConvertToUint8SaturatedUint16x8", argLength: 1, commutative: false}, - {name: "ConvertToUint8SaturatedUint16x16", argLength: 1, commutative: false}, - {name: "ConvertToUint8SaturatedUint16x32", argLength: 1, commutative: false}, - {name: "ConvertToUint8SaturatedUint32x4", argLength: 1, commutative: false}, - {name: "ConvertToUint8SaturatedUint32x8", argLength: 1, commutative: false}, - {name: "ConvertToUint8SaturatedUint32x16", argLength: 1, commutative: false}, - {name: "ConvertToUint8SaturatedUint64x2", argLength: 1, commutative: false}, - {name: "ConvertToUint8SaturatedUint64x4", argLength: 1, commutative: false}, - {name: "ConvertToUint8SaturatedUint64x8", argLength: 1, commutative: false}, - {name: "ConvertToUint8Uint16x8", argLength: 1, commutative: false}, - {name: "ConvertToUint8Uint16x16", argLength: 1, commutative: false}, - {name: "ConvertToUint8Uint16x32", argLength: 1, commutative: false}, - {name: "ConvertToUint8Uint32x4", argLength: 1, commutative: false}, - {name: "ConvertToUint8Uint32x8", argLength: 1, commutative: false}, - {name: "ConvertToUint8Uint32x16", argLength: 1, commutative: false}, - {name: "ConvertToUint8Uint64x2", argLength: 1, commutative: false}, - {name: "ConvertToUint8Uint64x4", argLength: 1, commutative: false}, - {name: "ConvertToUint8Uint64x8", argLength: 1, commutative: false}, - {name: "ConvertToUint16SaturatedPackedUint32x4", argLength: 2, commutative: false}, - {name: "ConvertToUint16SaturatedPackedUint32x8", argLength: 2, commutative: false}, - {name: "ConvertToUint16SaturatedPackedUint32x16", argLength: 2, commutative: false}, - {name: "ConvertToUint16SaturatedUint32x4", argLength: 1, commutative: false}, - {name: "ConvertToUint16SaturatedUint32x8", argLength: 1, commutative: false}, - {name: "ConvertToUint16SaturatedUint32x16", argLength: 1, commutative: false}, - {name: "ConvertToUint16SaturatedUint64x2", argLength: 1, commutative: false}, - {name: "ConvertToUint16SaturatedUint64x4", argLength: 1, commutative: false}, - {name: "ConvertToUint16SaturatedUint64x8", argLength: 1, commutative: false}, - {name: "ConvertToUint16Uint8x16", argLength: 1, commutative: false}, - {name: "ConvertToUint16Uint8x32", argLength: 1, commutative: false}, - {name: "ConvertToUint16Uint32x4", argLength: 1, commutative: false}, - {name: "ConvertToUint16Uint32x8", argLength: 1, commutative: false}, - {name: "ConvertToUint16Uint32x16", argLength: 1, commutative: false}, - {name: "ConvertToUint16Uint64x2", argLength: 1, commutative: false}, - {name: "ConvertToUint16Uint64x4", argLength: 1, commutative: false}, - {name: "ConvertToUint16Uint64x8", argLength: 1, commutative: false}, - {name: "ConvertToUint16x8Uint8x16", argLength: 1, commutative: false}, {name: "ConvertToUint32Float32x4", argLength: 1, commutative: false}, {name: "ConvertToUint32Float32x8", argLength: 1, commutative: false}, {name: "ConvertToUint32Float32x16", argLength: 1, commutative: false}, - {name: "ConvertToUint32SaturatedUint64x2", argLength: 1, commutative: false}, - {name: "ConvertToUint32SaturatedUint64x4", argLength: 1, commutative: false}, - {name: "ConvertToUint32SaturatedUint64x8", argLength: 1, commutative: false}, - {name: "ConvertToUint32Uint8x16", argLength: 1, commutative: false}, - {name: "ConvertToUint32Uint16x8", argLength: 1, commutative: false}, - {name: "ConvertToUint32Uint16x16", argLength: 1, commutative: false}, - {name: "ConvertToUint32Uint64x2", argLength: 1, commutative: false}, - {name: "ConvertToUint32Uint64x4", argLength: 1, commutative: false}, - {name: "ConvertToUint32Uint64x8", argLength: 1, commutative: false}, - {name: "ConvertToUint32x4Uint8x16", argLength: 1, commutative: false}, - {name: "ConvertToUint32x4Uint16x8", argLength: 1, commutative: false}, - {name: "ConvertToUint32x8Uint8x16", argLength: 1, commutative: false}, - {name: "ConvertToUint64Uint16x8", argLength: 1, commutative: false}, - {name: "ConvertToUint64Uint32x4", argLength: 1, commutative: false}, - {name: "ConvertToUint64Uint32x8", argLength: 1, commutative: false}, - {name: "ConvertToUint64x2Uint8x16", argLength: 1, commutative: false}, - {name: "ConvertToUint64x2Uint16x8", argLength: 1, commutative: false}, - {name: "ConvertToUint64x2Uint32x4", argLength: 1, commutative: false}, - {name: "ConvertToUint64x4Int16x8", argLength: 1, commutative: false}, - {name: "ConvertToUint64x4Uint8x16", argLength: 1, commutative: false}, - {name: "ConvertToUint64x4Uint16x8", argLength: 1, commutative: false}, - {name: "ConvertToUint64x8Uint8x16", argLength: 1, commutative: false}, {name: "CopySignInt8x16", argLength: 2, commutative: false}, {name: "CopySignInt8x32", argLength: 2, commutative: false}, {name: "CopySignInt16x8", argLength: 2, commutative: false}, @@ -441,6 +327,42 @@ func simdGenericOps() []opData { {name: "ExpandUint64x2", argLength: 2, commutative: false}, {name: "ExpandUint64x4", argLength: 2, commutative: false}, {name: "ExpandUint64x8", argLength: 2, commutative: false}, + {name: "ExtendLo2ToInt64x2Int8x16", argLength: 1, commutative: false}, + {name: "ExtendLo2ToInt64x2Int16x8", argLength: 1, commutative: false}, + {name: "ExtendLo2ToInt64x2Int32x4", argLength: 1, commutative: false}, + {name: "ExtendLo2ToUint64x2Uint8x16", argLength: 1, commutative: false}, + {name: "ExtendLo2ToUint64x2Uint16x8", argLength: 1, commutative: false}, + {name: "ExtendLo2ToUint64x2Uint32x4", argLength: 1, commutative: false}, + {name: "ExtendLo4ToInt32x4Int8x16", argLength: 1, commutative: false}, + {name: "ExtendLo4ToInt32x4Int16x8", argLength: 1, commutative: false}, + {name: "ExtendLo4ToInt64x4Int8x16", argLength: 1, commutative: false}, + {name: "ExtendLo4ToInt64x4Int16x8", argLength: 1, commutative: false}, + {name: "ExtendLo4ToUint32x4Uint8x16", argLength: 1, commutative: false}, + {name: "ExtendLo4ToUint32x4Uint16x8", argLength: 1, commutative: false}, + {name: "ExtendLo4ToUint64x4Uint8x16", argLength: 1, commutative: false}, + {name: "ExtendLo4ToUint64x4Uint16x8", argLength: 1, commutative: false}, + {name: "ExtendLo8ToInt16x8Int8x16", argLength: 1, commutative: false}, + {name: "ExtendLo8ToInt32x8Int8x16", argLength: 1, commutative: false}, + {name: "ExtendLo8ToInt64x8Int8x16", argLength: 1, commutative: false}, + {name: "ExtendLo8ToUint16x8Uint8x16", argLength: 1, commutative: false}, + {name: "ExtendLo8ToUint32x8Uint8x16", argLength: 1, commutative: false}, + {name: "ExtendLo8ToUint64x8Uint8x16", argLength: 1, commutative: false}, + {name: "ExtendToInt16Int8x16", argLength: 1, commutative: false}, + {name: "ExtendToInt16Int8x32", argLength: 1, commutative: false}, + {name: "ExtendToInt32Int8x16", argLength: 1, commutative: false}, + {name: "ExtendToInt32Int16x8", argLength: 1, commutative: false}, + {name: "ExtendToInt32Int16x16", argLength: 1, commutative: false}, + {name: "ExtendToInt64Int16x8", argLength: 1, commutative: false}, + {name: "ExtendToInt64Int32x4", argLength: 1, commutative: false}, + {name: "ExtendToInt64Int32x8", argLength: 1, commutative: false}, + {name: "ExtendToUint16Uint8x16", argLength: 1, commutative: false}, + {name: "ExtendToUint16Uint8x32", argLength: 1, commutative: false}, + {name: "ExtendToUint32Uint8x16", argLength: 1, commutative: false}, + {name: "ExtendToUint32Uint16x8", argLength: 1, commutative: false}, + {name: "ExtendToUint32Uint16x16", argLength: 1, commutative: false}, + {name: "ExtendToUint64Uint16x8", argLength: 1, commutative: false}, + {name: "ExtendToUint64Uint32x4", argLength: 1, commutative: false}, + {name: "ExtendToUint64Uint32x8", argLength: 1, commutative: false}, {name: "FloorFloat32x4", argLength: 1, commutative: false}, {name: "FloorFloat32x8", argLength: 1, commutative: false}, {name: "FloorFloat64x2", argLength: 1, commutative: false}, @@ -856,6 +778,48 @@ func simdGenericOps() []opData { {name: "SHA256Message1Uint32x4", argLength: 2, commutative: false}, {name: "SHA256Message2Uint32x4", argLength: 2, commutative: false}, {name: "SHA256TwoRoundsUint32x4", argLength: 3, commutative: false}, + {name: "SaturateToInt8Int16x8", argLength: 1, commutative: false}, + {name: "SaturateToInt8Int16x16", argLength: 1, commutative: false}, + {name: "SaturateToInt8Int16x32", argLength: 1, commutative: false}, + {name: "SaturateToInt8Int32x4", argLength: 1, commutative: false}, + {name: "SaturateToInt8Int32x8", argLength: 1, commutative: false}, + {name: "SaturateToInt8Int32x16", argLength: 1, commutative: false}, + {name: "SaturateToInt8Int64x2", argLength: 1, commutative: false}, + {name: "SaturateToInt8Int64x4", argLength: 1, commutative: false}, + {name: "SaturateToInt8Int64x8", argLength: 1, commutative: false}, + {name: "SaturateToInt16ConcatInt32x4", argLength: 2, commutative: false}, + {name: "SaturateToInt16ConcatInt32x8", argLength: 2, commutative: false}, + {name: "SaturateToInt16ConcatInt32x16", argLength: 2, commutative: false}, + {name: "SaturateToInt16Int32x4", argLength: 1, commutative: false}, + {name: "SaturateToInt16Int32x8", argLength: 1, commutative: false}, + {name: "SaturateToInt16Int32x16", argLength: 1, commutative: false}, + {name: "SaturateToInt16Int64x2", argLength: 1, commutative: false}, + {name: "SaturateToInt16Int64x4", argLength: 1, commutative: false}, + {name: "SaturateToInt16Int64x8", argLength: 1, commutative: false}, + {name: "SaturateToInt32Int64x2", argLength: 1, commutative: false}, + {name: "SaturateToInt32Int64x4", argLength: 1, commutative: false}, + {name: "SaturateToInt32Int64x8", argLength: 1, commutative: false}, + {name: "SaturateToUint8Int16x8", argLength: 1, commutative: false}, + {name: "SaturateToUint8Int16x16", argLength: 1, commutative: false}, + {name: "SaturateToUint8Int32x4", argLength: 1, commutative: false}, + {name: "SaturateToUint8Int32x8", argLength: 1, commutative: false}, + {name: "SaturateToUint8Int32x16", argLength: 1, commutative: false}, + {name: "SaturateToUint8Int64x2", argLength: 1, commutative: false}, + {name: "SaturateToUint8Int64x4", argLength: 1, commutative: false}, + {name: "SaturateToUint8Int64x8", argLength: 1, commutative: false}, + {name: "SaturateToUint8Uint16x32", argLength: 1, commutative: false}, + {name: "SaturateToUint16ConcatUint32x4", argLength: 2, commutative: false}, + {name: "SaturateToUint16ConcatUint32x8", argLength: 2, commutative: false}, + {name: "SaturateToUint16ConcatUint32x16", argLength: 2, commutative: false}, + {name: "SaturateToUint16Uint32x4", argLength: 1, commutative: false}, + {name: "SaturateToUint16Uint32x8", argLength: 1, commutative: false}, + {name: "SaturateToUint16Uint32x16", argLength: 1, commutative: false}, + {name: "SaturateToUint16Uint64x2", argLength: 1, commutative: false}, + {name: "SaturateToUint16Uint64x4", argLength: 1, commutative: false}, + {name: "SaturateToUint16Uint64x8", argLength: 1, commutative: false}, + {name: "SaturateToUint32Uint64x2", argLength: 1, commutative: false}, + {name: "SaturateToUint32Uint64x4", argLength: 1, commutative: false}, + {name: "SaturateToUint32Uint64x8", argLength: 1, commutative: false}, {name: "ScaleFloat32x4", argLength: 2, commutative: false}, {name: "ScaleFloat32x8", argLength: 2, commutative: false}, {name: "ScaleFloat32x16", argLength: 2, commutative: false}, @@ -1079,6 +1043,42 @@ func simdGenericOps() []opData { {name: "TruncFloat32x8", argLength: 1, commutative: false}, {name: "TruncFloat64x2", argLength: 1, commutative: false}, {name: "TruncFloat64x4", argLength: 1, commutative: false}, + {name: "TruncateToInt8Int16x8", argLength: 1, commutative: false}, + {name: "TruncateToInt8Int16x16", argLength: 1, commutative: false}, + {name: "TruncateToInt8Int16x32", argLength: 1, commutative: false}, + {name: "TruncateToInt8Int32x4", argLength: 1, commutative: false}, + {name: "TruncateToInt8Int32x8", argLength: 1, commutative: false}, + {name: "TruncateToInt8Int32x16", argLength: 1, commutative: false}, + {name: "TruncateToInt8Int64x2", argLength: 1, commutative: false}, + {name: "TruncateToInt8Int64x4", argLength: 1, commutative: false}, + {name: "TruncateToInt8Int64x8", argLength: 1, commutative: false}, + {name: "TruncateToInt16Int32x4", argLength: 1, commutative: false}, + {name: "TruncateToInt16Int32x8", argLength: 1, commutative: false}, + {name: "TruncateToInt16Int32x16", argLength: 1, commutative: false}, + {name: "TruncateToInt16Int64x2", argLength: 1, commutative: false}, + {name: "TruncateToInt16Int64x4", argLength: 1, commutative: false}, + {name: "TruncateToInt16Int64x8", argLength: 1, commutative: false}, + {name: "TruncateToInt32Int64x2", argLength: 1, commutative: false}, + {name: "TruncateToInt32Int64x4", argLength: 1, commutative: false}, + {name: "TruncateToInt32Int64x8", argLength: 1, commutative: false}, + {name: "TruncateToUint8Uint16x8", argLength: 1, commutative: false}, + {name: "TruncateToUint8Uint16x16", argLength: 1, commutative: false}, + {name: "TruncateToUint8Uint16x32", argLength: 1, commutative: false}, + {name: "TruncateToUint8Uint32x4", argLength: 1, commutative: false}, + {name: "TruncateToUint8Uint32x8", argLength: 1, commutative: false}, + {name: "TruncateToUint8Uint32x16", argLength: 1, commutative: false}, + {name: "TruncateToUint8Uint64x2", argLength: 1, commutative: false}, + {name: "TruncateToUint8Uint64x4", argLength: 1, commutative: false}, + {name: "TruncateToUint8Uint64x8", argLength: 1, commutative: false}, + {name: "TruncateToUint16Uint32x4", argLength: 1, commutative: false}, + {name: "TruncateToUint16Uint32x8", argLength: 1, commutative: false}, + {name: "TruncateToUint16Uint32x16", argLength: 1, commutative: false}, + {name: "TruncateToUint16Uint64x2", argLength: 1, commutative: false}, + {name: "TruncateToUint16Uint64x4", argLength: 1, commutative: false}, + {name: "TruncateToUint16Uint64x8", argLength: 1, commutative: false}, + {name: "TruncateToUint32Uint64x2", argLength: 1, commutative: false}, + {name: "TruncateToUint32Uint64x4", argLength: 1, commutative: false}, + {name: "TruncateToUint32Uint64x8", argLength: 1, commutative: false}, {name: "XorInt8x16", argLength: 2, commutative: true}, {name: "XorInt8x32", argLength: 2, commutative: true}, {name: "XorInt8x64", argLength: 2, commutative: true}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 5b8c35bec6e..2398f7f63fc 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1935,24 +1935,12 @@ const ( OpAMD64VPMOVSXWQMasked128 OpAMD64VPMOVSXWQMasked256 OpAMD64VPMOVSXWQMasked512 - OpAMD64VPMOVUSDB128_128 - OpAMD64VPMOVUSDB128_256 - OpAMD64VPMOVUSDB128_512 - OpAMD64VPMOVUSDBMasked128_128 - OpAMD64VPMOVUSDBMasked128_256 - OpAMD64VPMOVUSDBMasked128_512 OpAMD64VPMOVUSDW128_128 OpAMD64VPMOVUSDW128_256 OpAMD64VPMOVUSDW256 OpAMD64VPMOVUSDWMasked128_128 OpAMD64VPMOVUSDWMasked128_256 OpAMD64VPMOVUSDWMasked256 - OpAMD64VPMOVUSQB128_128 - OpAMD64VPMOVUSQB128_256 - OpAMD64VPMOVUSQB128_512 - OpAMD64VPMOVUSQBMasked128_128 - OpAMD64VPMOVUSQBMasked128_256 - OpAMD64VPMOVUSQBMasked128_512 OpAMD64VPMOVUSQD128_128 OpAMD64VPMOVUSQD128_256 OpAMD64VPMOVUSQD256 @@ -1965,11 +1953,7 @@ const ( OpAMD64VPMOVUSQWMasked128_128 OpAMD64VPMOVUSQWMasked128_256 OpAMD64VPMOVUSQWMasked128_512 - OpAMD64VPMOVUSWB128_128 - OpAMD64VPMOVUSWB128_256 OpAMD64VPMOVUSWB256 - OpAMD64VPMOVUSWBMasked128_128 - OpAMD64VPMOVUSWBMasked128_256 OpAMD64VPMOVUSWBMasked256 OpAMD64VPMOVWB128_128 OpAMD64VPMOVWB128_256 @@ -3429,23 +3413,15 @@ const ( OpAMD64VPMOVSXWQMasked128Merging OpAMD64VPMOVSXWQMasked256Merging OpAMD64VPMOVSXWQMasked512Merging - OpAMD64VPMOVUSDBMasked128_128Merging - OpAMD64VPMOVUSDBMasked128_256Merging - OpAMD64VPMOVUSDBMasked128_512Merging OpAMD64VPMOVUSDWMasked128_128Merging OpAMD64VPMOVUSDWMasked128_256Merging OpAMD64VPMOVUSDWMasked256Merging - OpAMD64VPMOVUSQBMasked128_128Merging - OpAMD64VPMOVUSQBMasked128_256Merging - OpAMD64VPMOVUSQBMasked128_512Merging OpAMD64VPMOVUSQDMasked128_128Merging OpAMD64VPMOVUSQDMasked128_256Merging OpAMD64VPMOVUSQDMasked256Merging OpAMD64VPMOVUSQWMasked128_128Merging OpAMD64VPMOVUSQWMasked128_256Merging OpAMD64VPMOVUSQWMasked128_512Merging - OpAMD64VPMOVUSWBMasked128_128Merging - OpAMD64VPMOVUSWBMasked128_256Merging OpAMD64VPMOVUSWBMasked256Merging OpAMD64VPMOVWBMasked128_128Merging OpAMD64VPMOVWBMasked128_256Merging @@ -6197,126 +6173,12 @@ const ( OpConcatPermuteUint64x2 OpConcatPermuteUint64x4 OpConcatPermuteUint64x8 - OpConvertToInt8Int16x8 - OpConvertToInt8Int16x16 - OpConvertToInt8Int16x32 - OpConvertToInt8Int32x4 - OpConvertToInt8Int32x8 - OpConvertToInt8Int32x16 - OpConvertToInt8Int64x2 - OpConvertToInt8Int64x4 - OpConvertToInt8Int64x8 - OpConvertToInt8SaturatedInt16x8 - OpConvertToInt8SaturatedInt16x16 - OpConvertToInt8SaturatedInt16x32 - OpConvertToInt8SaturatedInt32x4 - OpConvertToInt8SaturatedInt32x8 - OpConvertToInt8SaturatedInt32x16 - OpConvertToInt8SaturatedInt64x2 - OpConvertToInt8SaturatedInt64x4 - OpConvertToInt8SaturatedInt64x8 - OpConvertToInt16Int8x16 - OpConvertToInt16Int8x32 - OpConvertToInt16Int32x4 - OpConvertToInt16Int32x8 - OpConvertToInt16Int32x16 - OpConvertToInt16Int64x2 - OpConvertToInt16Int64x4 - OpConvertToInt16Int64x8 - OpConvertToInt16SaturatedInt32x4 - OpConvertToInt16SaturatedInt32x8 - OpConvertToInt16SaturatedInt32x16 - OpConvertToInt16SaturatedInt64x2 - OpConvertToInt16SaturatedInt64x4 - OpConvertToInt16SaturatedInt64x8 - OpConvertToInt16SaturatedPackedInt32x4 - OpConvertToInt16SaturatedPackedInt32x8 - OpConvertToInt16SaturatedPackedInt32x16 - OpConvertToInt16x8Int8x16 OpConvertToInt32Float32x4 OpConvertToInt32Float32x8 OpConvertToInt32Float32x16 - OpConvertToInt32Int8x16 - OpConvertToInt32Int16x8 - OpConvertToInt32Int16x16 - OpConvertToInt32Int64x2 - OpConvertToInt32Int64x4 - OpConvertToInt32Int64x8 - OpConvertToInt32SaturatedInt64x2 - OpConvertToInt32SaturatedInt64x4 - OpConvertToInt32SaturatedInt64x8 - OpConvertToInt32x4Int8x16 - OpConvertToInt32x4Int16x8 - OpConvertToInt32x8Int8x16 - OpConvertToInt64Int16x8 - OpConvertToInt64Int32x4 - OpConvertToInt64Int32x8 - OpConvertToInt64x2Int8x16 - OpConvertToInt64x2Int16x8 - OpConvertToInt64x2Int32x4 - OpConvertToInt64x4Int8x16 - OpConvertToInt64x8Int8x16 - OpConvertToUint8SaturatedUint16x8 - OpConvertToUint8SaturatedUint16x16 - OpConvertToUint8SaturatedUint16x32 - OpConvertToUint8SaturatedUint32x4 - OpConvertToUint8SaturatedUint32x8 - OpConvertToUint8SaturatedUint32x16 - OpConvertToUint8SaturatedUint64x2 - OpConvertToUint8SaturatedUint64x4 - OpConvertToUint8SaturatedUint64x8 - OpConvertToUint8Uint16x8 - OpConvertToUint8Uint16x16 - OpConvertToUint8Uint16x32 - OpConvertToUint8Uint32x4 - OpConvertToUint8Uint32x8 - OpConvertToUint8Uint32x16 - OpConvertToUint8Uint64x2 - OpConvertToUint8Uint64x4 - OpConvertToUint8Uint64x8 - OpConvertToUint16SaturatedPackedUint32x4 - OpConvertToUint16SaturatedPackedUint32x8 - OpConvertToUint16SaturatedPackedUint32x16 - OpConvertToUint16SaturatedUint32x4 - OpConvertToUint16SaturatedUint32x8 - OpConvertToUint16SaturatedUint32x16 - OpConvertToUint16SaturatedUint64x2 - OpConvertToUint16SaturatedUint64x4 - OpConvertToUint16SaturatedUint64x8 - OpConvertToUint16Uint8x16 - OpConvertToUint16Uint8x32 - OpConvertToUint16Uint32x4 - OpConvertToUint16Uint32x8 - OpConvertToUint16Uint32x16 - OpConvertToUint16Uint64x2 - OpConvertToUint16Uint64x4 - OpConvertToUint16Uint64x8 - OpConvertToUint16x8Uint8x16 OpConvertToUint32Float32x4 OpConvertToUint32Float32x8 OpConvertToUint32Float32x16 - OpConvertToUint32SaturatedUint64x2 - OpConvertToUint32SaturatedUint64x4 - OpConvertToUint32SaturatedUint64x8 - OpConvertToUint32Uint8x16 - OpConvertToUint32Uint16x8 - OpConvertToUint32Uint16x16 - OpConvertToUint32Uint64x2 - OpConvertToUint32Uint64x4 - OpConvertToUint32Uint64x8 - OpConvertToUint32x4Uint8x16 - OpConvertToUint32x4Uint16x8 - OpConvertToUint32x8Uint8x16 - OpConvertToUint64Uint16x8 - OpConvertToUint64Uint32x4 - OpConvertToUint64Uint32x8 - OpConvertToUint64x2Uint8x16 - OpConvertToUint64x2Uint16x8 - OpConvertToUint64x2Uint32x4 - OpConvertToUint64x4Int16x8 - OpConvertToUint64x4Uint8x16 - OpConvertToUint64x4Uint16x8 - OpConvertToUint64x8Uint8x16 OpCopySignInt8x16 OpCopySignInt8x32 OpCopySignInt16x8 @@ -6401,6 +6263,42 @@ const ( OpExpandUint64x2 OpExpandUint64x4 OpExpandUint64x8 + OpExtendLo2ToInt64x2Int8x16 + OpExtendLo2ToInt64x2Int16x8 + OpExtendLo2ToInt64x2Int32x4 + OpExtendLo2ToUint64x2Uint8x16 + OpExtendLo2ToUint64x2Uint16x8 + OpExtendLo2ToUint64x2Uint32x4 + OpExtendLo4ToInt32x4Int8x16 + OpExtendLo4ToInt32x4Int16x8 + OpExtendLo4ToInt64x4Int8x16 + OpExtendLo4ToInt64x4Int16x8 + OpExtendLo4ToUint32x4Uint8x16 + OpExtendLo4ToUint32x4Uint16x8 + OpExtendLo4ToUint64x4Uint8x16 + OpExtendLo4ToUint64x4Uint16x8 + OpExtendLo8ToInt16x8Int8x16 + OpExtendLo8ToInt32x8Int8x16 + OpExtendLo8ToInt64x8Int8x16 + OpExtendLo8ToUint16x8Uint8x16 + OpExtendLo8ToUint32x8Uint8x16 + OpExtendLo8ToUint64x8Uint8x16 + OpExtendToInt16Int8x16 + OpExtendToInt16Int8x32 + OpExtendToInt32Int8x16 + OpExtendToInt32Int16x8 + OpExtendToInt32Int16x16 + OpExtendToInt64Int16x8 + OpExtendToInt64Int32x4 + OpExtendToInt64Int32x8 + OpExtendToUint16Uint8x16 + OpExtendToUint16Uint8x32 + OpExtendToUint32Uint8x16 + OpExtendToUint32Uint16x8 + OpExtendToUint32Uint16x16 + OpExtendToUint64Uint16x8 + OpExtendToUint64Uint32x4 + OpExtendToUint64Uint32x8 OpFloorFloat32x4 OpFloorFloat32x8 OpFloorFloat64x2 @@ -6816,6 +6714,48 @@ const ( OpSHA256Message1Uint32x4 OpSHA256Message2Uint32x4 OpSHA256TwoRoundsUint32x4 + OpSaturateToInt8Int16x8 + OpSaturateToInt8Int16x16 + OpSaturateToInt8Int16x32 + OpSaturateToInt8Int32x4 + OpSaturateToInt8Int32x8 + OpSaturateToInt8Int32x16 + OpSaturateToInt8Int64x2 + OpSaturateToInt8Int64x4 + OpSaturateToInt8Int64x8 + OpSaturateToInt16ConcatInt32x4 + OpSaturateToInt16ConcatInt32x8 + OpSaturateToInt16ConcatInt32x16 + OpSaturateToInt16Int32x4 + OpSaturateToInt16Int32x8 + OpSaturateToInt16Int32x16 + OpSaturateToInt16Int64x2 + OpSaturateToInt16Int64x4 + OpSaturateToInt16Int64x8 + OpSaturateToInt32Int64x2 + OpSaturateToInt32Int64x4 + OpSaturateToInt32Int64x8 + OpSaturateToUint8Int16x8 + OpSaturateToUint8Int16x16 + OpSaturateToUint8Int32x4 + OpSaturateToUint8Int32x8 + OpSaturateToUint8Int32x16 + OpSaturateToUint8Int64x2 + OpSaturateToUint8Int64x4 + OpSaturateToUint8Int64x8 + OpSaturateToUint8Uint16x32 + OpSaturateToUint16ConcatUint32x4 + OpSaturateToUint16ConcatUint32x8 + OpSaturateToUint16ConcatUint32x16 + OpSaturateToUint16Uint32x4 + OpSaturateToUint16Uint32x8 + OpSaturateToUint16Uint32x16 + OpSaturateToUint16Uint64x2 + OpSaturateToUint16Uint64x4 + OpSaturateToUint16Uint64x8 + OpSaturateToUint32Uint64x2 + OpSaturateToUint32Uint64x4 + OpSaturateToUint32Uint64x8 OpScaleFloat32x4 OpScaleFloat32x8 OpScaleFloat32x16 @@ -7039,6 +6979,42 @@ const ( OpTruncFloat32x8 OpTruncFloat64x2 OpTruncFloat64x4 + OpTruncateToInt8Int16x8 + OpTruncateToInt8Int16x16 + OpTruncateToInt8Int16x32 + OpTruncateToInt8Int32x4 + OpTruncateToInt8Int32x8 + OpTruncateToInt8Int32x16 + OpTruncateToInt8Int64x2 + OpTruncateToInt8Int64x4 + OpTruncateToInt8Int64x8 + OpTruncateToInt16Int32x4 + OpTruncateToInt16Int32x8 + OpTruncateToInt16Int32x16 + OpTruncateToInt16Int64x2 + OpTruncateToInt16Int64x4 + OpTruncateToInt16Int64x8 + OpTruncateToInt32Int64x2 + OpTruncateToInt32Int64x4 + OpTruncateToInt32Int64x8 + OpTruncateToUint8Uint16x8 + OpTruncateToUint8Uint16x16 + OpTruncateToUint8Uint16x32 + OpTruncateToUint8Uint32x4 + OpTruncateToUint8Uint32x8 + OpTruncateToUint8Uint32x16 + OpTruncateToUint8Uint64x2 + OpTruncateToUint8Uint64x4 + OpTruncateToUint8Uint64x8 + OpTruncateToUint16Uint32x4 + OpTruncateToUint16Uint32x8 + OpTruncateToUint16Uint32x16 + OpTruncateToUint16Uint64x2 + OpTruncateToUint16Uint64x4 + OpTruncateToUint16Uint64x8 + OpTruncateToUint32Uint64x2 + OpTruncateToUint32Uint64x4 + OpTruncateToUint32Uint64x8 OpXorInt8x16 OpXorInt8x32 OpXorInt8x64 @@ -30708,87 +30684,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPMOVUSDB128_128", - argLen: 1, - asm: x86.AVPMOVUSDB, - reg: regInfo{ - inputs: []inputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPMOVUSDB128_256", - argLen: 1, - asm: x86.AVPMOVUSDB, - reg: regInfo{ - inputs: []inputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPMOVUSDB128_512", - argLen: 1, - asm: x86.AVPMOVUSDB, - reg: regInfo{ - inputs: []inputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPMOVUSDBMasked128_128", - argLen: 2, - asm: x86.AVPMOVUSDB, - reg: regInfo{ - inputs: []inputInfo{ - {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPMOVUSDBMasked128_256", - argLen: 2, - asm: x86.AVPMOVUSDB, - reg: regInfo{ - inputs: []inputInfo{ - {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPMOVUSDBMasked128_512", - argLen: 2, - asm: x86.AVPMOVUSDB, - reg: regInfo{ - inputs: []inputInfo{ - {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, { name: "VPMOVUSDW128_128", argLen: 1, @@ -30870,87 +30765,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPMOVUSQB128_128", - argLen: 1, - asm: x86.AVPMOVUSQB, - reg: regInfo{ - inputs: []inputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPMOVUSQB128_256", - argLen: 1, - asm: x86.AVPMOVUSQB, - reg: regInfo{ - inputs: []inputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPMOVUSQB128_512", - argLen: 1, - asm: x86.AVPMOVUSQB, - reg: regInfo{ - inputs: []inputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPMOVUSQBMasked128_128", - argLen: 2, - asm: x86.AVPMOVUSQB, - reg: regInfo{ - inputs: []inputInfo{ - {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPMOVUSQBMasked128_256", - argLen: 2, - asm: x86.AVPMOVUSQB, - reg: regInfo{ - inputs: []inputInfo{ - {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPMOVUSQBMasked128_512", - argLen: 2, - asm: x86.AVPMOVUSQB, - reg: regInfo{ - inputs: []inputInfo{ - {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, { name: "VPMOVUSQD128_128", argLen: 1, @@ -31113,32 +30927,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPMOVUSWB128_128", - argLen: 1, - asm: x86.AVPMOVUSWB, - reg: regInfo{ - inputs: []inputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPMOVUSWB128_256", - argLen: 1, - asm: x86.AVPMOVUSWB, - reg: regInfo{ - inputs: []inputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, { name: "VPMOVUSWB256", argLen: 1, @@ -31152,34 +30940,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPMOVUSWBMasked128_128", - argLen: 2, - asm: x86.AVPMOVUSWB, - reg: regInfo{ - inputs: []inputInfo{ - {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPMOVUSWBMasked128_256", - argLen: 2, - asm: x86.AVPMOVUSWB, - reg: regInfo{ - inputs: []inputInfo{ - {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, { name: "VPMOVUSWBMasked256", argLen: 2, @@ -54179,54 +53939,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPMOVUSDBMasked128_128Merging", - argLen: 3, - resultInArg0: true, - asm: x86.AVPMOVUSDB, - reg: regInfo{ - inputs: []inputInfo{ - {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPMOVUSDBMasked128_256Merging", - argLen: 3, - resultInArg0: true, - asm: x86.AVPMOVUSDB, - reg: regInfo{ - inputs: []inputInfo{ - {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPMOVUSDBMasked128_512Merging", - argLen: 3, - resultInArg0: true, - asm: x86.AVPMOVUSDB, - reg: regInfo{ - inputs: []inputInfo{ - {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, { name: "VPMOVUSDWMasked128_128Merging", argLen: 3, @@ -54275,54 +53987,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPMOVUSQBMasked128_128Merging", - argLen: 3, - resultInArg0: true, - asm: x86.AVPMOVUSQB, - reg: regInfo{ - inputs: []inputInfo{ - {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPMOVUSQBMasked128_256Merging", - argLen: 3, - resultInArg0: true, - asm: x86.AVPMOVUSQB, - reg: regInfo{ - inputs: []inputInfo{ - {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPMOVUSQBMasked128_512Merging", - argLen: 3, - resultInArg0: true, - asm: x86.AVPMOVUSQB, - reg: regInfo{ - inputs: []inputInfo{ - {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, { name: "VPMOVUSQDMasked128_128Merging", argLen: 3, @@ -54419,38 +54083,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPMOVUSWBMasked128_128Merging", - argLen: 3, - resultInArg0: true, - asm: x86.AVPMOVUSWB, - reg: regInfo{ - inputs: []inputInfo{ - {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPMOVUSWBMasked128_256Merging", - argLen: 3, - resultInArg0: true, - asm: x86.AVPMOVUSWB, - reg: regInfo{ - inputs: []inputInfo{ - {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, { name: "VPMOVUSWBMasked256Merging", argLen: 3, @@ -87220,186 +86852,6 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "ConvertToInt8Int16x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt8Int16x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt8Int16x32", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt8Int32x4", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt8Int32x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt8Int32x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt8Int64x2", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt8Int64x4", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt8Int64x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt8SaturatedInt16x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt8SaturatedInt16x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt8SaturatedInt16x32", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt8SaturatedInt32x4", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt8SaturatedInt32x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt8SaturatedInt32x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt8SaturatedInt64x2", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt8SaturatedInt64x4", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt8SaturatedInt64x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt16Int8x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt16Int8x32", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt16Int32x4", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt16Int32x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt16Int32x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt16Int64x2", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt16Int64x4", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt16Int64x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt16SaturatedInt32x4", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt16SaturatedInt32x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt16SaturatedInt32x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt16SaturatedInt64x2", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt16SaturatedInt64x4", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt16SaturatedInt64x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt16SaturatedPackedInt32x4", - argLen: 2, - generic: true, - }, - { - name: "ConvertToInt16SaturatedPackedInt32x8", - argLen: 2, - generic: true, - }, - { - name: "ConvertToInt16SaturatedPackedInt32x16", - argLen: 2, - generic: true, - }, - { - name: "ConvertToInt16x8Int8x16", - argLen: 1, - generic: true, - }, { name: "ConvertToInt32Float32x4", argLen: 1, @@ -87415,286 +86867,6 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, - { - name: "ConvertToInt32Int8x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt32Int16x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt32Int16x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt32Int64x2", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt32Int64x4", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt32Int64x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt32SaturatedInt64x2", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt32SaturatedInt64x4", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt32SaturatedInt64x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt32x4Int8x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt32x4Int16x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt32x8Int8x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt64Int16x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt64Int32x4", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt64Int32x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt64x2Int8x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt64x2Int16x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt64x2Int32x4", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt64x4Int8x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToInt64x8Int8x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint8SaturatedUint16x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint8SaturatedUint16x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint8SaturatedUint16x32", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint8SaturatedUint32x4", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint8SaturatedUint32x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint8SaturatedUint32x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint8SaturatedUint64x2", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint8SaturatedUint64x4", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint8SaturatedUint64x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint8Uint16x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint8Uint16x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint8Uint16x32", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint8Uint32x4", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint8Uint32x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint8Uint32x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint8Uint64x2", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint8Uint64x4", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint8Uint64x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint16SaturatedPackedUint32x4", - argLen: 2, - generic: true, - }, - { - name: "ConvertToUint16SaturatedPackedUint32x8", - argLen: 2, - generic: true, - }, - { - name: "ConvertToUint16SaturatedPackedUint32x16", - argLen: 2, - generic: true, - }, - { - name: "ConvertToUint16SaturatedUint32x4", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint16SaturatedUint32x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint16SaturatedUint32x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint16SaturatedUint64x2", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint16SaturatedUint64x4", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint16SaturatedUint64x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint16Uint8x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint16Uint8x32", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint16Uint32x4", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint16Uint32x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint16Uint32x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint16Uint64x2", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint16Uint64x4", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint16Uint64x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint16x8Uint8x16", - argLen: 1, - generic: true, - }, { name: "ConvertToUint32Float32x4", argLen: 1, @@ -87710,116 +86882,6 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, - { - name: "ConvertToUint32SaturatedUint64x2", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint32SaturatedUint64x4", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint32SaturatedUint64x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint32Uint8x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint32Uint16x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint32Uint16x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint32Uint64x2", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint32Uint64x4", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint32Uint64x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint32x4Uint8x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint32x4Uint16x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint32x8Uint8x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint64Uint16x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint64Uint32x4", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint64Uint32x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint64x2Uint8x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint64x2Uint16x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint64x2Uint32x4", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint64x4Int16x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint64x4Uint8x16", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint64x4Uint16x8", - argLen: 1, - generic: true, - }, - { - name: "ConvertToUint64x8Uint8x16", - argLen: 1, - generic: true, - }, { name: "CopySignInt8x16", argLen: 2, @@ -88270,6 +87332,186 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "ExtendLo2ToInt64x2Int8x16", + argLen: 1, + generic: true, + }, + { + name: "ExtendLo2ToInt64x2Int16x8", + argLen: 1, + generic: true, + }, + { + name: "ExtendLo2ToInt64x2Int32x4", + argLen: 1, + generic: true, + }, + { + name: "ExtendLo2ToUint64x2Uint8x16", + argLen: 1, + generic: true, + }, + { + name: "ExtendLo2ToUint64x2Uint16x8", + argLen: 1, + generic: true, + }, + { + name: "ExtendLo2ToUint64x2Uint32x4", + argLen: 1, + generic: true, + }, + { + name: "ExtendLo4ToInt32x4Int8x16", + argLen: 1, + generic: true, + }, + { + name: "ExtendLo4ToInt32x4Int16x8", + argLen: 1, + generic: true, + }, + { + name: "ExtendLo4ToInt64x4Int8x16", + argLen: 1, + generic: true, + }, + { + name: "ExtendLo4ToInt64x4Int16x8", + argLen: 1, + generic: true, + }, + { + name: "ExtendLo4ToUint32x4Uint8x16", + argLen: 1, + generic: true, + }, + { + name: "ExtendLo4ToUint32x4Uint16x8", + argLen: 1, + generic: true, + }, + { + name: "ExtendLo4ToUint64x4Uint8x16", + argLen: 1, + generic: true, + }, + { + name: "ExtendLo4ToUint64x4Uint16x8", + argLen: 1, + generic: true, + }, + { + name: "ExtendLo8ToInt16x8Int8x16", + argLen: 1, + generic: true, + }, + { + name: "ExtendLo8ToInt32x8Int8x16", + argLen: 1, + generic: true, + }, + { + name: "ExtendLo8ToInt64x8Int8x16", + argLen: 1, + generic: true, + }, + { + name: "ExtendLo8ToUint16x8Uint8x16", + argLen: 1, + generic: true, + }, + { + name: "ExtendLo8ToUint32x8Uint8x16", + argLen: 1, + generic: true, + }, + { + name: "ExtendLo8ToUint64x8Uint8x16", + argLen: 1, + generic: true, + }, + { + name: "ExtendToInt16Int8x16", + argLen: 1, + generic: true, + }, + { + name: "ExtendToInt16Int8x32", + argLen: 1, + generic: true, + }, + { + name: "ExtendToInt32Int8x16", + argLen: 1, + generic: true, + }, + { + name: "ExtendToInt32Int16x8", + argLen: 1, + generic: true, + }, + { + name: "ExtendToInt32Int16x16", + argLen: 1, + generic: true, + }, + { + name: "ExtendToInt64Int16x8", + argLen: 1, + generic: true, + }, + { + name: "ExtendToInt64Int32x4", + argLen: 1, + generic: true, + }, + { + name: "ExtendToInt64Int32x8", + argLen: 1, + generic: true, + }, + { + name: "ExtendToUint16Uint8x16", + argLen: 1, + generic: true, + }, + { + name: "ExtendToUint16Uint8x32", + argLen: 1, + generic: true, + }, + { + name: "ExtendToUint32Uint8x16", + argLen: 1, + generic: true, + }, + { + name: "ExtendToUint32Uint16x8", + argLen: 1, + generic: true, + }, + { + name: "ExtendToUint32Uint16x16", + argLen: 1, + generic: true, + }, + { + name: "ExtendToUint64Uint16x8", + argLen: 1, + generic: true, + }, + { + name: "ExtendToUint64Uint32x4", + argLen: 1, + generic: true, + }, + { + name: "ExtendToUint64Uint32x8", + argLen: 1, + generic: true, + }, { name: "FloorFloat32x4", argLen: 1, @@ -90483,6 +89725,216 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "SaturateToInt8Int16x8", + argLen: 1, + generic: true, + }, + { + name: "SaturateToInt8Int16x16", + argLen: 1, + generic: true, + }, + { + name: "SaturateToInt8Int16x32", + argLen: 1, + generic: true, + }, + { + name: "SaturateToInt8Int32x4", + argLen: 1, + generic: true, + }, + { + name: "SaturateToInt8Int32x8", + argLen: 1, + generic: true, + }, + { + name: "SaturateToInt8Int32x16", + argLen: 1, + generic: true, + }, + { + name: "SaturateToInt8Int64x2", + argLen: 1, + generic: true, + }, + { + name: "SaturateToInt8Int64x4", + argLen: 1, + generic: true, + }, + { + name: "SaturateToInt8Int64x8", + argLen: 1, + generic: true, + }, + { + name: "SaturateToInt16ConcatInt32x4", + argLen: 2, + generic: true, + }, + { + name: "SaturateToInt16ConcatInt32x8", + argLen: 2, + generic: true, + }, + { + name: "SaturateToInt16ConcatInt32x16", + argLen: 2, + generic: true, + }, + { + name: "SaturateToInt16Int32x4", + argLen: 1, + generic: true, + }, + { + name: "SaturateToInt16Int32x8", + argLen: 1, + generic: true, + }, + { + name: "SaturateToInt16Int32x16", + argLen: 1, + generic: true, + }, + { + name: "SaturateToInt16Int64x2", + argLen: 1, + generic: true, + }, + { + name: "SaturateToInt16Int64x4", + argLen: 1, + generic: true, + }, + { + name: "SaturateToInt16Int64x8", + argLen: 1, + generic: true, + }, + { + name: "SaturateToInt32Int64x2", + argLen: 1, + generic: true, + }, + { + name: "SaturateToInt32Int64x4", + argLen: 1, + generic: true, + }, + { + name: "SaturateToInt32Int64x8", + argLen: 1, + generic: true, + }, + { + name: "SaturateToUint8Int16x8", + argLen: 1, + generic: true, + }, + { + name: "SaturateToUint8Int16x16", + argLen: 1, + generic: true, + }, + { + name: "SaturateToUint8Int32x4", + argLen: 1, + generic: true, + }, + { + name: "SaturateToUint8Int32x8", + argLen: 1, + generic: true, + }, + { + name: "SaturateToUint8Int32x16", + argLen: 1, + generic: true, + }, + { + name: "SaturateToUint8Int64x2", + argLen: 1, + generic: true, + }, + { + name: "SaturateToUint8Int64x4", + argLen: 1, + generic: true, + }, + { + name: "SaturateToUint8Int64x8", + argLen: 1, + generic: true, + }, + { + name: "SaturateToUint8Uint16x32", + argLen: 1, + generic: true, + }, + { + name: "SaturateToUint16ConcatUint32x4", + argLen: 2, + generic: true, + }, + { + name: "SaturateToUint16ConcatUint32x8", + argLen: 2, + generic: true, + }, + { + name: "SaturateToUint16ConcatUint32x16", + argLen: 2, + generic: true, + }, + { + name: "SaturateToUint16Uint32x4", + argLen: 1, + generic: true, + }, + { + name: "SaturateToUint16Uint32x8", + argLen: 1, + generic: true, + }, + { + name: "SaturateToUint16Uint32x16", + argLen: 1, + generic: true, + }, + { + name: "SaturateToUint16Uint64x2", + argLen: 1, + generic: true, + }, + { + name: "SaturateToUint16Uint64x4", + argLen: 1, + generic: true, + }, + { + name: "SaturateToUint16Uint64x8", + argLen: 1, + generic: true, + }, + { + name: "SaturateToUint32Uint64x2", + argLen: 1, + generic: true, + }, + { + name: "SaturateToUint32Uint64x4", + argLen: 1, + generic: true, + }, + { + name: "SaturateToUint32Uint64x8", + argLen: 1, + generic: true, + }, { name: "ScaleFloat32x4", argLen: 2, @@ -91598,6 +91050,186 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "TruncateToInt8Int16x8", + argLen: 1, + generic: true, + }, + { + name: "TruncateToInt8Int16x16", + argLen: 1, + generic: true, + }, + { + name: "TruncateToInt8Int16x32", + argLen: 1, + generic: true, + }, + { + name: "TruncateToInt8Int32x4", + argLen: 1, + generic: true, + }, + { + name: "TruncateToInt8Int32x8", + argLen: 1, + generic: true, + }, + { + name: "TruncateToInt8Int32x16", + argLen: 1, + generic: true, + }, + { + name: "TruncateToInt8Int64x2", + argLen: 1, + generic: true, + }, + { + name: "TruncateToInt8Int64x4", + argLen: 1, + generic: true, + }, + { + name: "TruncateToInt8Int64x8", + argLen: 1, + generic: true, + }, + { + name: "TruncateToInt16Int32x4", + argLen: 1, + generic: true, + }, + { + name: "TruncateToInt16Int32x8", + argLen: 1, + generic: true, + }, + { + name: "TruncateToInt16Int32x16", + argLen: 1, + generic: true, + }, + { + name: "TruncateToInt16Int64x2", + argLen: 1, + generic: true, + }, + { + name: "TruncateToInt16Int64x4", + argLen: 1, + generic: true, + }, + { + name: "TruncateToInt16Int64x8", + argLen: 1, + generic: true, + }, + { + name: "TruncateToInt32Int64x2", + argLen: 1, + generic: true, + }, + { + name: "TruncateToInt32Int64x4", + argLen: 1, + generic: true, + }, + { + name: "TruncateToInt32Int64x8", + argLen: 1, + generic: true, + }, + { + name: "TruncateToUint8Uint16x8", + argLen: 1, + generic: true, + }, + { + name: "TruncateToUint8Uint16x16", + argLen: 1, + generic: true, + }, + { + name: "TruncateToUint8Uint16x32", + argLen: 1, + generic: true, + }, + { + name: "TruncateToUint8Uint32x4", + argLen: 1, + generic: true, + }, + { + name: "TruncateToUint8Uint32x8", + argLen: 1, + generic: true, + }, + { + name: "TruncateToUint8Uint32x16", + argLen: 1, + generic: true, + }, + { + name: "TruncateToUint8Uint64x2", + argLen: 1, + generic: true, + }, + { + name: "TruncateToUint8Uint64x4", + argLen: 1, + generic: true, + }, + { + name: "TruncateToUint8Uint64x8", + argLen: 1, + generic: true, + }, + { + name: "TruncateToUint16Uint32x4", + argLen: 1, + generic: true, + }, + { + name: "TruncateToUint16Uint32x8", + argLen: 1, + generic: true, + }, + { + name: "TruncateToUint16Uint32x16", + argLen: 1, + generic: true, + }, + { + name: "TruncateToUint16Uint64x2", + argLen: 1, + generic: true, + }, + { + name: "TruncateToUint16Uint64x4", + argLen: 1, + generic: true, + }, + { + name: "TruncateToUint16Uint64x8", + argLen: 1, + generic: true, + }, + { + name: "TruncateToUint32Uint64x2", + argLen: 1, + generic: true, + }, + { + name: "TruncateToUint32Uint64x4", + argLen: 1, + generic: true, + }, + { + name: "TruncateToUint32Uint64x8", + argLen: 1, + generic: true, + }, { name: "XorInt8x16", argLen: 2, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 34175c11b8e..d2618decf38 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -2667,60 +2667,6 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpConstBool(v) case OpConstNil: return rewriteValueAMD64_OpConstNil(v) - case OpConvertToInt16Int32x16: - v.Op = OpAMD64VPMOVDW256 - return true - case OpConvertToInt16Int32x4: - v.Op = OpAMD64VPMOVDW128_128 - return true - case OpConvertToInt16Int32x8: - v.Op = OpAMD64VPMOVDW128_256 - return true - case OpConvertToInt16Int64x2: - v.Op = OpAMD64VPMOVQW128_128 - return true - case OpConvertToInt16Int64x4: - v.Op = OpAMD64VPMOVQW128_256 - return true - case OpConvertToInt16Int64x8: - v.Op = OpAMD64VPMOVQW128_512 - return true - case OpConvertToInt16Int8x16: - v.Op = OpAMD64VPMOVSXBW256 - return true - case OpConvertToInt16Int8x32: - v.Op = OpAMD64VPMOVSXBW512 - return true - case OpConvertToInt16SaturatedInt32x16: - v.Op = OpAMD64VPMOVSDW256 - return true - case OpConvertToInt16SaturatedInt32x4: - v.Op = OpAMD64VPMOVSDW128_128 - return true - case OpConvertToInt16SaturatedInt32x8: - v.Op = OpAMD64VPMOVSDW128_256 - return true - case OpConvertToInt16SaturatedInt64x2: - v.Op = OpAMD64VPMOVSQW128_128 - return true - case OpConvertToInt16SaturatedInt64x4: - v.Op = OpAMD64VPMOVSQW128_256 - return true - case OpConvertToInt16SaturatedInt64x8: - v.Op = OpAMD64VPMOVSQW128_512 - return true - case OpConvertToInt16SaturatedPackedInt32x16: - v.Op = OpAMD64VPACKSSDW512 - return true - case OpConvertToInt16SaturatedPackedInt32x4: - v.Op = OpAMD64VPACKSSDW128 - return true - case OpConvertToInt16SaturatedPackedInt32x8: - v.Op = OpAMD64VPACKSSDW256 - return true - case OpConvertToInt16x8Int8x16: - v.Op = OpAMD64VPMOVSXBW128 - return true case OpConvertToInt32Float32x16: v.Op = OpAMD64VCVTTPS2DQ512 return true @@ -2730,174 +2676,6 @@ func rewriteValueAMD64(v *Value) bool { case OpConvertToInt32Float32x8: v.Op = OpAMD64VCVTTPS2DQ256 return true - case OpConvertToInt32Int16x16: - v.Op = OpAMD64VPMOVSXWD512 - return true - case OpConvertToInt32Int16x8: - v.Op = OpAMD64VPMOVSXWD256 - return true - case OpConvertToInt32Int64x2: - v.Op = OpAMD64VPMOVQD128_128 - return true - case OpConvertToInt32Int64x4: - v.Op = OpAMD64VPMOVQD128_256 - return true - case OpConvertToInt32Int64x8: - v.Op = OpAMD64VPMOVQD256 - return true - case OpConvertToInt32Int8x16: - v.Op = OpAMD64VPMOVSXBD512 - return true - case OpConvertToInt32SaturatedInt64x2: - v.Op = OpAMD64VPMOVSQD128_128 - return true - case OpConvertToInt32SaturatedInt64x4: - v.Op = OpAMD64VPMOVSQD128_256 - return true - case OpConvertToInt32SaturatedInt64x8: - v.Op = OpAMD64VPMOVSQD256 - return true - case OpConvertToInt32x4Int16x8: - v.Op = OpAMD64VPMOVSXWD128 - return true - case OpConvertToInt32x4Int8x16: - v.Op = OpAMD64VPMOVSXBD128 - return true - case OpConvertToInt32x8Int8x16: - v.Op = OpAMD64VPMOVSXBD256 - return true - case OpConvertToInt64Int16x8: - v.Op = OpAMD64VPMOVSXWQ512 - return true - case OpConvertToInt64Int32x4: - v.Op = OpAMD64VPMOVSXDQ256 - return true - case OpConvertToInt64Int32x8: - v.Op = OpAMD64VPMOVSXDQ512 - return true - case OpConvertToInt64x2Int16x8: - v.Op = OpAMD64VPMOVSXWQ128 - return true - case OpConvertToInt64x2Int32x4: - v.Op = OpAMD64VPMOVSXDQ128 - return true - case OpConvertToInt64x2Int8x16: - v.Op = OpAMD64VPMOVSXBQ128 - return true - case OpConvertToInt64x4Int8x16: - v.Op = OpAMD64VPMOVSXBQ256 - return true - case OpConvertToInt64x8Int8x16: - v.Op = OpAMD64VPMOVSXBQ512 - return true - case OpConvertToInt8Int16x16: - v.Op = OpAMD64VPMOVWB128_256 - return true - case OpConvertToInt8Int16x32: - v.Op = OpAMD64VPMOVWB256 - return true - case OpConvertToInt8Int16x8: - v.Op = OpAMD64VPMOVWB128_128 - return true - case OpConvertToInt8Int32x16: - v.Op = OpAMD64VPMOVDB128_512 - return true - case OpConvertToInt8Int32x4: - v.Op = OpAMD64VPMOVDB128_128 - return true - case OpConvertToInt8Int32x8: - v.Op = OpAMD64VPMOVDB128_256 - return true - case OpConvertToInt8Int64x2: - v.Op = OpAMD64VPMOVQB128_128 - return true - case OpConvertToInt8Int64x4: - v.Op = OpAMD64VPMOVQB128_256 - return true - case OpConvertToInt8Int64x8: - v.Op = OpAMD64VPMOVQB128_512 - return true - case OpConvertToInt8SaturatedInt16x16: - v.Op = OpAMD64VPMOVSWB128_256 - return true - case OpConvertToInt8SaturatedInt16x32: - v.Op = OpAMD64VPMOVSWB256 - return true - case OpConvertToInt8SaturatedInt16x8: - v.Op = OpAMD64VPMOVSWB128_128 - return true - case OpConvertToInt8SaturatedInt32x16: - v.Op = OpAMD64VPMOVSDB128_512 - return true - case OpConvertToInt8SaturatedInt32x4: - v.Op = OpAMD64VPMOVSDB128_128 - return true - case OpConvertToInt8SaturatedInt32x8: - v.Op = OpAMD64VPMOVSDB128_256 - return true - case OpConvertToInt8SaturatedInt64x2: - v.Op = OpAMD64VPMOVSQB128_128 - return true - case OpConvertToInt8SaturatedInt64x4: - v.Op = OpAMD64VPMOVSQB128_256 - return true - case OpConvertToInt8SaturatedInt64x8: - v.Op = OpAMD64VPMOVSQB128_512 - return true - case OpConvertToUint16SaturatedPackedUint32x16: - v.Op = OpAMD64VPACKUSDW512 - return true - case OpConvertToUint16SaturatedPackedUint32x4: - v.Op = OpAMD64VPACKUSDW128 - return true - case OpConvertToUint16SaturatedPackedUint32x8: - v.Op = OpAMD64VPACKUSDW256 - return true - case OpConvertToUint16SaturatedUint32x16: - v.Op = OpAMD64VPMOVUSDW256 - return true - case OpConvertToUint16SaturatedUint32x4: - v.Op = OpAMD64VPMOVUSDW128_128 - return true - case OpConvertToUint16SaturatedUint32x8: - v.Op = OpAMD64VPMOVUSDW128_256 - return true - case OpConvertToUint16SaturatedUint64x2: - v.Op = OpAMD64VPMOVUSQW128_128 - return true - case OpConvertToUint16SaturatedUint64x4: - v.Op = OpAMD64VPMOVUSQW128_256 - return true - case OpConvertToUint16SaturatedUint64x8: - v.Op = OpAMD64VPMOVUSQW128_512 - return true - case OpConvertToUint16Uint32x16: - v.Op = OpAMD64VPMOVDW256 - return true - case OpConvertToUint16Uint32x4: - v.Op = OpAMD64VPMOVDW128_128 - return true - case OpConvertToUint16Uint32x8: - v.Op = OpAMD64VPMOVDW128_256 - return true - case OpConvertToUint16Uint64x2: - v.Op = OpAMD64VPMOVQW128_128 - return true - case OpConvertToUint16Uint64x4: - v.Op = OpAMD64VPMOVQW128_256 - return true - case OpConvertToUint16Uint64x8: - v.Op = OpAMD64VPMOVQW128_512 - return true - case OpConvertToUint16Uint8x16: - v.Op = OpAMD64VPMOVZXBW256 - return true - case OpConvertToUint16Uint8x32: - v.Op = OpAMD64VPMOVZXBW512 - return true - case OpConvertToUint16x8Uint8x16: - v.Op = OpAMD64VPMOVZXBW128 - return true case OpConvertToUint32Float32x16: v.Op = OpAMD64VCVTPS2UDQ512 return true @@ -2907,126 +2685,6 @@ func rewriteValueAMD64(v *Value) bool { case OpConvertToUint32Float32x8: v.Op = OpAMD64VCVTPS2UDQ256 return true - case OpConvertToUint32SaturatedUint64x2: - v.Op = OpAMD64VPMOVUSQD128_128 - return true - case OpConvertToUint32SaturatedUint64x4: - v.Op = OpAMD64VPMOVUSQD128_256 - return true - case OpConvertToUint32SaturatedUint64x8: - v.Op = OpAMD64VPMOVUSQD256 - return true - case OpConvertToUint32Uint16x16: - v.Op = OpAMD64VPMOVZXWD512 - return true - case OpConvertToUint32Uint16x8: - v.Op = OpAMD64VPMOVZXWD256 - return true - case OpConvertToUint32Uint64x2: - v.Op = OpAMD64VPMOVQD128_128 - return true - case OpConvertToUint32Uint64x4: - v.Op = OpAMD64VPMOVQD128_256 - return true - case OpConvertToUint32Uint64x8: - v.Op = OpAMD64VPMOVQD256 - return true - case OpConvertToUint32Uint8x16: - v.Op = OpAMD64VPMOVZXBD512 - return true - case OpConvertToUint32x4Uint16x8: - v.Op = OpAMD64VPMOVZXWD128 - return true - case OpConvertToUint32x4Uint8x16: - v.Op = OpAMD64VPMOVZXBD128 - return true - case OpConvertToUint32x8Uint8x16: - v.Op = OpAMD64VPMOVZXBD256 - return true - case OpConvertToUint64Uint16x8: - v.Op = OpAMD64VPMOVZXWQ512 - return true - case OpConvertToUint64Uint32x4: - v.Op = OpAMD64VPMOVZXDQ256 - return true - case OpConvertToUint64Uint32x8: - v.Op = OpAMD64VPMOVZXDQ512 - return true - case OpConvertToUint64x2Uint16x8: - v.Op = OpAMD64VPMOVZXWQ128 - return true - case OpConvertToUint64x2Uint32x4: - v.Op = OpAMD64VPMOVZXDQ128 - return true - case OpConvertToUint64x2Uint8x16: - v.Op = OpAMD64VPMOVZXBQ128 - return true - case OpConvertToUint64x4Int16x8: - v.Op = OpAMD64VPMOVSXWQ256 - return true - case OpConvertToUint64x4Uint16x8: - v.Op = OpAMD64VPMOVZXWQ256 - return true - case OpConvertToUint64x4Uint8x16: - v.Op = OpAMD64VPMOVZXBQ256 - return true - case OpConvertToUint64x8Uint8x16: - v.Op = OpAMD64VPMOVZXBQ512 - return true - case OpConvertToUint8SaturatedUint16x16: - v.Op = OpAMD64VPMOVUSWB128_256 - return true - case OpConvertToUint8SaturatedUint16x32: - v.Op = OpAMD64VPMOVUSWB256 - return true - case OpConvertToUint8SaturatedUint16x8: - v.Op = OpAMD64VPMOVUSWB128_128 - return true - case OpConvertToUint8SaturatedUint32x16: - v.Op = OpAMD64VPMOVUSDB128_512 - return true - case OpConvertToUint8SaturatedUint32x4: - v.Op = OpAMD64VPMOVUSDB128_128 - return true - case OpConvertToUint8SaturatedUint32x8: - v.Op = OpAMD64VPMOVUSDB128_256 - return true - case OpConvertToUint8SaturatedUint64x2: - v.Op = OpAMD64VPMOVUSQB128_128 - return true - case OpConvertToUint8SaturatedUint64x4: - v.Op = OpAMD64VPMOVUSQB128_256 - return true - case OpConvertToUint8SaturatedUint64x8: - v.Op = OpAMD64VPMOVUSQB128_512 - return true - case OpConvertToUint8Uint16x16: - v.Op = OpAMD64VPMOVWB128_256 - return true - case OpConvertToUint8Uint16x32: - v.Op = OpAMD64VPMOVWB256 - return true - case OpConvertToUint8Uint16x8: - v.Op = OpAMD64VPMOVWB128_128 - return true - case OpConvertToUint8Uint32x16: - v.Op = OpAMD64VPMOVDB128_512 - return true - case OpConvertToUint8Uint32x4: - v.Op = OpAMD64VPMOVDB128_128 - return true - case OpConvertToUint8Uint32x8: - v.Op = OpAMD64VPMOVDB128_256 - return true - case OpConvertToUint8Uint64x2: - v.Op = OpAMD64VPMOVQB128_128 - return true - case OpConvertToUint8Uint64x4: - v.Op = OpAMD64VPMOVQB128_256 - return true - case OpConvertToUint8Uint64x8: - v.Op = OpAMD64VPMOVQB128_512 - return true case OpCopySignInt16x16: v.Op = OpAMD64VPSIGNW256 return true @@ -3369,6 +3027,114 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpExpandUint8x32(v) case OpExpandUint8x64: return rewriteValueAMD64_OpExpandUint8x64(v) + case OpExtendLo2ToInt64x2Int16x8: + v.Op = OpAMD64VPMOVSXWQ128 + return true + case OpExtendLo2ToInt64x2Int32x4: + v.Op = OpAMD64VPMOVSXDQ128 + return true + case OpExtendLo2ToInt64x2Int8x16: + v.Op = OpAMD64VPMOVSXBQ128 + return true + case OpExtendLo2ToUint64x2Uint16x8: + v.Op = OpAMD64VPMOVZXWQ128 + return true + case OpExtendLo2ToUint64x2Uint32x4: + v.Op = OpAMD64VPMOVZXDQ128 + return true + case OpExtendLo2ToUint64x2Uint8x16: + v.Op = OpAMD64VPMOVZXBQ128 + return true + case OpExtendLo4ToInt32x4Int16x8: + v.Op = OpAMD64VPMOVSXWD128 + return true + case OpExtendLo4ToInt32x4Int8x16: + v.Op = OpAMD64VPMOVSXBD128 + return true + case OpExtendLo4ToInt64x4Int16x8: + v.Op = OpAMD64VPMOVSXWQ256 + return true + case OpExtendLo4ToInt64x4Int8x16: + v.Op = OpAMD64VPMOVSXBQ256 + return true + case OpExtendLo4ToUint32x4Uint16x8: + v.Op = OpAMD64VPMOVZXWD128 + return true + case OpExtendLo4ToUint32x4Uint8x16: + v.Op = OpAMD64VPMOVZXBD128 + return true + case OpExtendLo4ToUint64x4Uint16x8: + v.Op = OpAMD64VPMOVZXWQ256 + return true + case OpExtendLo4ToUint64x4Uint8x16: + v.Op = OpAMD64VPMOVZXBQ256 + return true + case OpExtendLo8ToInt16x8Int8x16: + v.Op = OpAMD64VPMOVSXBW128 + return true + case OpExtendLo8ToInt32x8Int8x16: + v.Op = OpAMD64VPMOVSXBD256 + return true + case OpExtendLo8ToInt64x8Int8x16: + v.Op = OpAMD64VPMOVSXBQ512 + return true + case OpExtendLo8ToUint16x8Uint8x16: + v.Op = OpAMD64VPMOVZXBW128 + return true + case OpExtendLo8ToUint32x8Uint8x16: + v.Op = OpAMD64VPMOVZXBD256 + return true + case OpExtendLo8ToUint64x8Uint8x16: + v.Op = OpAMD64VPMOVZXBQ512 + return true + case OpExtendToInt16Int8x16: + v.Op = OpAMD64VPMOVSXBW256 + return true + case OpExtendToInt16Int8x32: + v.Op = OpAMD64VPMOVSXBW512 + return true + case OpExtendToInt32Int16x16: + v.Op = OpAMD64VPMOVSXWD512 + return true + case OpExtendToInt32Int16x8: + v.Op = OpAMD64VPMOVSXWD256 + return true + case OpExtendToInt32Int8x16: + v.Op = OpAMD64VPMOVSXBD512 + return true + case OpExtendToInt64Int16x8: + v.Op = OpAMD64VPMOVSXWQ512 + return true + case OpExtendToInt64Int32x4: + v.Op = OpAMD64VPMOVSXDQ256 + return true + case OpExtendToInt64Int32x8: + v.Op = OpAMD64VPMOVSXDQ512 + return true + case OpExtendToUint16Uint8x16: + v.Op = OpAMD64VPMOVZXBW256 + return true + case OpExtendToUint16Uint8x32: + v.Op = OpAMD64VPMOVZXBW512 + return true + case OpExtendToUint32Uint16x16: + v.Op = OpAMD64VPMOVZXWD512 + return true + case OpExtendToUint32Uint16x8: + v.Op = OpAMD64VPMOVZXWD256 + return true + case OpExtendToUint32Uint8x16: + v.Op = OpAMD64VPMOVZXBD512 + return true + case OpExtendToUint64Uint16x8: + v.Op = OpAMD64VPMOVZXWQ512 + return true + case OpExtendToUint64Uint32x4: + v.Op = OpAMD64VPMOVZXDQ256 + return true + case OpExtendToUint64Uint32x8: + v.Op = OpAMD64VPMOVZXDQ512 + return true case OpFMA: return rewriteValueAMD64_OpFMA(v) case OpFloor: @@ -4989,6 +4755,132 @@ func rewriteValueAMD64(v *Value) bool { case OpSHA256TwoRoundsUint32x4: v.Op = OpAMD64SHA256RNDS2128 return true + case OpSaturateToInt16ConcatInt32x16: + v.Op = OpAMD64VPACKSSDW512 + return true + case OpSaturateToInt16ConcatInt32x4: + v.Op = OpAMD64VPACKSSDW128 + return true + case OpSaturateToInt16ConcatInt32x8: + v.Op = OpAMD64VPACKSSDW256 + return true + case OpSaturateToInt16Int32x16: + v.Op = OpAMD64VPMOVSDW256 + return true + case OpSaturateToInt16Int32x4: + v.Op = OpAMD64VPMOVSDW128_128 + return true + case OpSaturateToInt16Int32x8: + v.Op = OpAMD64VPMOVSDW128_256 + return true + case OpSaturateToInt16Int64x2: + v.Op = OpAMD64VPMOVSQW128_128 + return true + case OpSaturateToInt16Int64x4: + v.Op = OpAMD64VPMOVSQW128_256 + return true + case OpSaturateToInt16Int64x8: + v.Op = OpAMD64VPMOVSQW128_512 + return true + case OpSaturateToInt32Int64x2: + v.Op = OpAMD64VPMOVSQD128_128 + return true + case OpSaturateToInt32Int64x4: + v.Op = OpAMD64VPMOVSQD128_256 + return true + case OpSaturateToInt32Int64x8: + v.Op = OpAMD64VPMOVSQD256 + return true + case OpSaturateToInt8Int16x16: + v.Op = OpAMD64VPMOVSWB128_256 + return true + case OpSaturateToInt8Int16x32: + v.Op = OpAMD64VPMOVSWB256 + return true + case OpSaturateToInt8Int16x8: + v.Op = OpAMD64VPMOVSWB128_128 + return true + case OpSaturateToInt8Int32x16: + v.Op = OpAMD64VPMOVSDB128_512 + return true + case OpSaturateToInt8Int32x4: + v.Op = OpAMD64VPMOVSDB128_128 + return true + case OpSaturateToInt8Int32x8: + v.Op = OpAMD64VPMOVSDB128_256 + return true + case OpSaturateToInt8Int64x2: + v.Op = OpAMD64VPMOVSQB128_128 + return true + case OpSaturateToInt8Int64x4: + v.Op = OpAMD64VPMOVSQB128_256 + return true + case OpSaturateToInt8Int64x8: + v.Op = OpAMD64VPMOVSQB128_512 + return true + case OpSaturateToUint16ConcatUint32x16: + v.Op = OpAMD64VPACKUSDW512 + return true + case OpSaturateToUint16ConcatUint32x4: + v.Op = OpAMD64VPACKUSDW128 + return true + case OpSaturateToUint16ConcatUint32x8: + v.Op = OpAMD64VPACKUSDW256 + return true + case OpSaturateToUint16Uint32x16: + v.Op = OpAMD64VPMOVUSDW256 + return true + case OpSaturateToUint16Uint32x4: + v.Op = OpAMD64VPMOVUSDW128_128 + return true + case OpSaturateToUint16Uint32x8: + v.Op = OpAMD64VPMOVUSDW128_256 + return true + case OpSaturateToUint16Uint64x2: + v.Op = OpAMD64VPMOVUSQW128_128 + return true + case OpSaturateToUint16Uint64x4: + v.Op = OpAMD64VPMOVUSQW128_256 + return true + case OpSaturateToUint16Uint64x8: + v.Op = OpAMD64VPMOVUSQW128_512 + return true + case OpSaturateToUint32Uint64x2: + v.Op = OpAMD64VPMOVUSQD128_128 + return true + case OpSaturateToUint32Uint64x4: + v.Op = OpAMD64VPMOVUSQD128_256 + return true + case OpSaturateToUint32Uint64x8: + v.Op = OpAMD64VPMOVUSQD256 + return true + case OpSaturateToUint8Int16x16: + v.Op = OpAMD64VPMOVSWB128_256 + return true + case OpSaturateToUint8Int16x8: + v.Op = OpAMD64VPMOVSWB128_128 + return true + case OpSaturateToUint8Int32x16: + v.Op = OpAMD64VPMOVSDB128_512 + return true + case OpSaturateToUint8Int32x4: + v.Op = OpAMD64VPMOVSDB128_128 + return true + case OpSaturateToUint8Int32x8: + v.Op = OpAMD64VPMOVSDB128_256 + return true + case OpSaturateToUint8Int64x2: + v.Op = OpAMD64VPMOVSQB128_128 + return true + case OpSaturateToUint8Int64x4: + v.Op = OpAMD64VPMOVSQB128_256 + return true + case OpSaturateToUint8Int64x8: + v.Op = OpAMD64VPMOVSQB128_512 + return true + case OpSaturateToUint8Uint16x32: + v.Op = OpAMD64VPMOVUSWB256 + return true case OpScaleFloat32x16: v.Op = OpAMD64VSCALEFPS512 return true @@ -5899,6 +5791,114 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpTruncScaledResidueFloat64x4(v) case OpTruncScaledResidueFloat64x8: return rewriteValueAMD64_OpTruncScaledResidueFloat64x8(v) + case OpTruncateToInt16Int32x16: + v.Op = OpAMD64VPMOVDW256 + return true + case OpTruncateToInt16Int32x4: + v.Op = OpAMD64VPMOVDW128_128 + return true + case OpTruncateToInt16Int32x8: + v.Op = OpAMD64VPMOVDW128_256 + return true + case OpTruncateToInt16Int64x2: + v.Op = OpAMD64VPMOVQW128_128 + return true + case OpTruncateToInt16Int64x4: + v.Op = OpAMD64VPMOVQW128_256 + return true + case OpTruncateToInt16Int64x8: + v.Op = OpAMD64VPMOVQW128_512 + return true + case OpTruncateToInt32Int64x2: + v.Op = OpAMD64VPMOVQD128_128 + return true + case OpTruncateToInt32Int64x4: + v.Op = OpAMD64VPMOVQD128_256 + return true + case OpTruncateToInt32Int64x8: + v.Op = OpAMD64VPMOVQD256 + return true + case OpTruncateToInt8Int16x16: + v.Op = OpAMD64VPMOVWB128_256 + return true + case OpTruncateToInt8Int16x32: + v.Op = OpAMD64VPMOVWB256 + return true + case OpTruncateToInt8Int16x8: + v.Op = OpAMD64VPMOVWB128_128 + return true + case OpTruncateToInt8Int32x16: + v.Op = OpAMD64VPMOVDB128_512 + return true + case OpTruncateToInt8Int32x4: + v.Op = OpAMD64VPMOVDB128_128 + return true + case OpTruncateToInt8Int32x8: + v.Op = OpAMD64VPMOVDB128_256 + return true + case OpTruncateToInt8Int64x2: + v.Op = OpAMD64VPMOVQB128_128 + return true + case OpTruncateToInt8Int64x4: + v.Op = OpAMD64VPMOVQB128_256 + return true + case OpTruncateToInt8Int64x8: + v.Op = OpAMD64VPMOVQB128_512 + return true + case OpTruncateToUint16Uint32x16: + v.Op = OpAMD64VPMOVDW256 + return true + case OpTruncateToUint16Uint32x4: + v.Op = OpAMD64VPMOVDW128_128 + return true + case OpTruncateToUint16Uint32x8: + v.Op = OpAMD64VPMOVDW128_256 + return true + case OpTruncateToUint16Uint64x2: + v.Op = OpAMD64VPMOVQW128_128 + return true + case OpTruncateToUint16Uint64x4: + v.Op = OpAMD64VPMOVQW128_256 + return true + case OpTruncateToUint16Uint64x8: + v.Op = OpAMD64VPMOVQW128_512 + return true + case OpTruncateToUint32Uint64x2: + v.Op = OpAMD64VPMOVQD128_128 + return true + case OpTruncateToUint32Uint64x4: + v.Op = OpAMD64VPMOVQD128_256 + return true + case OpTruncateToUint32Uint64x8: + v.Op = OpAMD64VPMOVQD256 + return true + case OpTruncateToUint8Uint16x16: + v.Op = OpAMD64VPMOVWB128_256 + return true + case OpTruncateToUint8Uint16x32: + v.Op = OpAMD64VPMOVWB256 + return true + case OpTruncateToUint8Uint16x8: + v.Op = OpAMD64VPMOVWB128_128 + return true + case OpTruncateToUint8Uint32x16: + v.Op = OpAMD64VPMOVDB128_512 + return true + case OpTruncateToUint8Uint32x4: + v.Op = OpAMD64VPMOVDB128_128 + return true + case OpTruncateToUint8Uint32x8: + v.Op = OpAMD64VPMOVDB128_256 + return true + case OpTruncateToUint8Uint64x2: + v.Op = OpAMD64VPMOVQB128_128 + return true + case OpTruncateToUint8Uint64x4: + v.Op = OpAMD64VPMOVQB128_256 + return true + case OpTruncateToUint8Uint64x8: + v.Op = OpAMD64VPMOVQB128_512 + return true case OpWB: v.Op = OpAMD64LoweredWB return true @@ -31267,90 +31267,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked128(v *Value) bool { v.AddArg4(x, y, z, mask) return true } - // match: (VMOVDQU16Masked128 (VPMOVWB128_128 x) mask) - // result: (VPMOVWBMasked128_128 x mask) - for { - if v_0.Op != OpAMD64VPMOVWB128_128 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVWBMasked128_128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU16Masked128 (VPMOVSWB128_128 x) mask) - // result: (VPMOVSWBMasked128_128 x mask) - for { - if v_0.Op != OpAMD64VPMOVSWB128_128 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSWBMasked128_128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU16Masked128 (VPMOVSXWD128 x) mask) - // result: (VPMOVSXWDMasked128 x mask) - for { - if v_0.Op != OpAMD64VPMOVSXWD128 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSXWDMasked128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU16Masked128 (VPMOVSXWQ128 x) mask) - // result: (VPMOVSXWQMasked128 x mask) - for { - if v_0.Op != OpAMD64VPMOVSXWQ128 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSXWQMasked128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU16Masked128 (VPMOVUSWB128_128 x) mask) - // result: (VPMOVUSWBMasked128_128 x mask) - for { - if v_0.Op != OpAMD64VPMOVUSWB128_128 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSWBMasked128_128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU16Masked128 (VPMOVZXWD128 x) mask) - // result: (VPMOVZXWDMasked128 x mask) - for { - if v_0.Op != OpAMD64VPMOVZXWD128 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVZXWDMasked128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU16Masked128 (VPMOVZXWQ128 x) mask) - // result: (VPMOVZXWQMasked128 x mask) - for { - if v_0.Op != OpAMD64VPMOVZXWQ128 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVZXWQMasked128) - v.AddArg2(x, mask) - return true - } // match: (VMOVDQU16Masked128 (VPMADDWD128 x y) mask) // result: (VPMADDWDMasked128 x y mask) for { @@ -31377,6 +31293,54 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked128(v *Value) bool { v.AddArg3(x, y, mask) return true } + // match: (VMOVDQU16Masked128 (VPMOVSXWQ128 x) mask) + // result: (VPMOVSXWQMasked128 x mask) + for { + if v_0.Op != OpAMD64VPMOVSXWQ128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXWQMasked128) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU16Masked128 (VPMOVZXWQ128 x) mask) + // result: (VPMOVZXWQMasked128 x mask) + for { + if v_0.Op != OpAMD64VPMOVZXWQ128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXWQMasked128) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU16Masked128 (VPMOVSXWD128 x) mask) + // result: (VPMOVSXWDMasked128 x mask) + for { + if v_0.Op != OpAMD64VPMOVSXWD128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXWDMasked128) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU16Masked128 (VPMOVZXWD128 x) mask) + // result: (VPMOVZXWDMasked128 x mask) + for { + if v_0.Op != OpAMD64VPMOVZXWD128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXWDMasked128) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU16Masked128 (VPMAXSW128 x y) mask) // result: (VPMAXSWMasked128 x y mask) for { @@ -31493,6 +31457,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked128(v *Value) bool { v.AddArg3(x, y, mask) return true } + // match: (VMOVDQU16Masked128 (VPMOVSWB128_128 x) mask) + // result: (VPMOVSWBMasked128_128 x mask) + for { + if v_0.Op != OpAMD64VPMOVSWB128_128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSWBMasked128_128) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU16Masked128 (VPSHLDW128 [a] x y) mask) // result: (VPSHLDWMasked128 [a] x y mask) for { @@ -31668,6 +31644,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked128(v *Value) bool { v.AddArg3(x, y, mask) return true } + // match: (VMOVDQU16Masked128 (VPMOVWB128_128 x) mask) + // result: (VPMOVWBMasked128_128 x mask) + for { + if v_0.Op != OpAMD64VPMOVWB128_128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVWBMasked128_128) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU16Masked128 (VPSHUFHW128 [a] x) mask) // result: (VPSHUFHWMasked128 [a] x mask) for { @@ -31819,100 +31807,30 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked256(v *Value) bool { v.AddArg4(x, y, z, mask) return true } - // match: (VMOVDQU16Masked256 (VPMOVWB128_256 x) mask) - // result: (VPMOVWBMasked128_256 x mask) + // match: (VMOVDQU16Masked256 (VPMADDWD256 x y) mask) + // result: (VPMADDWDMasked256 x y mask) for { - if v_0.Op != OpAMD64VPMOVWB128_256 { + if v_0.Op != OpAMD64VPMADDWD256 { break } + y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVWBMasked128_256) - v.AddArg2(x, mask) + v.reset(OpAMD64VPMADDWDMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU16Masked256 (VPMOVWB256 x) mask) - // result: (VPMOVWBMasked256 x mask) + // match: (VMOVDQU16Masked256 (VPMADDUBSW256 x y) mask) + // result: (VPMADDUBSWMasked256 x y mask) for { - if v_0.Op != OpAMD64VPMOVWB256 { + if v_0.Op != OpAMD64VPMADDUBSW256 { break } + y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVWBMasked256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU16Masked256 (VPMOVSWB128_256 x) mask) - // result: (VPMOVSWBMasked128_256 x mask) - for { - if v_0.Op != OpAMD64VPMOVSWB128_256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSWBMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU16Masked256 (VPMOVSWB256 x) mask) - // result: (VPMOVSWBMasked256 x mask) - for { - if v_0.Op != OpAMD64VPMOVSWB256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSWBMasked256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU16Masked256 (VPMOVSXWD256 x) mask) - // result: (VPMOVSXWDMasked256 x mask) - for { - if v_0.Op != OpAMD64VPMOVSXWD256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSXWDMasked256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU16Masked256 (VPMOVUSWB128_256 x) mask) - // result: (VPMOVUSWBMasked128_256 x mask) - for { - if v_0.Op != OpAMD64VPMOVUSWB128_256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSWBMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU16Masked256 (VPMOVUSWB256 x) mask) - // result: (VPMOVUSWBMasked256 x mask) - for { - if v_0.Op != OpAMD64VPMOVUSWB256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSWBMasked256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU16Masked256 (VPMOVZXWD256 x) mask) - // result: (VPMOVZXWDMasked256 x mask) - for { - if v_0.Op != OpAMD64VPMOVZXWD256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVZXWDMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64VPMADDUBSWMasked256) + v.AddArg3(x, y, mask) return true } // match: (VMOVDQU16Masked256 (VPMOVSXWQ256 x) mask) @@ -31939,30 +31857,28 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked256(v *Value) bool { v.AddArg2(x, mask) return true } - // match: (VMOVDQU16Masked256 (VPMADDWD256 x y) mask) - // result: (VPMADDWDMasked256 x y mask) + // match: (VMOVDQU16Masked256 (VPMOVSXWD256 x) mask) + // result: (VPMOVSXWDMasked256 x mask) for { - if v_0.Op != OpAMD64VPMADDWD256 { + if v_0.Op != OpAMD64VPMOVSXWD256 { break } - y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMADDWDMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPMOVSXWDMasked256) + v.AddArg2(x, mask) return true } - // match: (VMOVDQU16Masked256 (VPMADDUBSW256 x y) mask) - // result: (VPMADDUBSWMasked256 x y mask) + // match: (VMOVDQU16Masked256 (VPMOVZXWD256 x) mask) + // result: (VPMOVZXWDMasked256 x mask) for { - if v_0.Op != OpAMD64VPMADDUBSW256 { + if v_0.Op != OpAMD64VPMOVZXWD256 { break } - y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMADDUBSWMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPMOVZXWDMasked256) + v.AddArg2(x, mask) return true } // match: (VMOVDQU16Masked256 (VPMAXSW256 x y) mask) @@ -32081,6 +31997,42 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked256(v *Value) bool { v.AddArg3(x, y, mask) return true } + // match: (VMOVDQU16Masked256 (VPMOVSWB128_256 x) mask) + // result: (VPMOVSWBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVSWB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSWBMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU16Masked256 (VPMOVSWB256 x) mask) + // result: (VPMOVSWBMasked256 x mask) + for { + if v_0.Op != OpAMD64VPMOVSWB256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSWBMasked256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU16Masked256 (VPMOVUSWB256 x) mask) + // result: (VPMOVUSWBMasked256 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSWB256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSWBMasked256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU16Masked256 (VPSHLDW256 [a] x y) mask) // result: (VPSHLDWMasked256 [a] x y mask) for { @@ -32256,6 +32208,30 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked256(v *Value) bool { v.AddArg3(x, y, mask) return true } + // match: (VMOVDQU16Masked256 (VPMOVWB128_256 x) mask) + // result: (VPMOVWBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVWB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVWBMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU16Masked256 (VPMOVWB256 x) mask) + // result: (VPMOVWBMasked256 x mask) + for { + if v_0.Op != OpAMD64VPMOVWB256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVWBMasked256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU16Masked256 (VPSHUFHW256 [a] x) mask) // result: (VPSHUFHWMasked256 [a] x mask) for { @@ -32407,6 +32383,32 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked512(v *Value) bool { v.AddArg4(x, y, z, mask) return true } + // match: (VMOVDQU16Masked512 (VPMADDWD512 x y) mask) + // result: (VPMADDWDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMADDWD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMADDWDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPMADDUBSW512 x y) mask) + // result: (VPMADDUBSWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMADDUBSW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMADDUBSWMasked512) + v.AddArg3(x, y, mask) + return true + } // match: (VMOVDQU16Masked512 (VPMOVSXWD512 x) mask) // result: (VPMOVSXWDMasked512 x mask) for { @@ -32455,32 +32457,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked512(v *Value) bool { v.AddArg2(x, mask) return true } - // match: (VMOVDQU16Masked512 (VPMADDWD512 x y) mask) - // result: (VPMADDWDMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPMADDWD512 { - break - } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMADDWDMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked512 (VPMADDUBSW512 x y) mask) - // result: (VPMADDUBSWMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPMADDUBSW512 { - break - } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMADDUBSWMasked512) - v.AddArg3(x, y, mask) - return true - } // match: (VMOVDQU16Masked512 (VPMAXSW512 x y) mask) // result: (VPMAXSWMasked512 x y mask) for { @@ -32951,67 +32927,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked128(v *Value) bool { v.AddArg4(x, y, z, mask) return true } - // match: (VMOVDQU32Masked128 (VPMOVDB128_128 x) mask) - // result: (VPMOVDBMasked128_128 x mask) - for { - if v_0.Op != OpAMD64VPMOVDB128_128 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVDBMasked128_128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked128 (VPMOVSDB128_128 x) mask) - // result: (VPMOVSDBMasked128_128 x mask) - for { - if v_0.Op != OpAMD64VPMOVSDB128_128 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSDBMasked128_128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked128 (VPMOVDW128_128 x) mask) - // result: (VPMOVDWMasked128_128 x mask) - for { - if v_0.Op != OpAMD64VPMOVDW128_128 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVDWMasked128_128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked128 (VPMOVSDW128_128 x) mask) - // result: (VPMOVSDWMasked128_128 x mask) - for { - if v_0.Op != OpAMD64VPMOVSDW128_128 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSDWMasked128_128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked128 (VPACKSSDW128 x y) mask) - // result: (VPACKSSDWMasked128 x y mask) - for { - if v_0.Op != OpAMD64VPACKSSDW128 { - break - } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPACKSSDWMasked128) - v.AddArg3(x, y, mask) - return true - } // match: (VMOVDQU32Masked128 (VCVTTPS2DQ128 x) mask) // result: (VCVTTPS2DQMasked128 x mask) for { @@ -33024,55 +32939,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked128(v *Value) bool { v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked128 (VPMOVSXDQ128 x) mask) - // result: (VPMOVSXDQMasked128 x mask) - for { - if v_0.Op != OpAMD64VPMOVSXDQ128 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSXDQMasked128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked128 (VPMOVUSDB128_128 x) mask) - // result: (VPMOVUSDBMasked128_128 x mask) - for { - if v_0.Op != OpAMD64VPMOVUSDB128_128 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSDBMasked128_128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked128 (VPMOVUSDW128_128 x) mask) - // result: (VPMOVUSDWMasked128_128 x mask) - for { - if v_0.Op != OpAMD64VPMOVUSDW128_128 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSDWMasked128_128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked128 (VPACKUSDW128 x y) mask) - // result: (VPACKUSDWMasked128 x y mask) - for { - if v_0.Op != OpAMD64VPACKUSDW128 { - break - } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPACKUSDWMasked128) - v.AddArg3(x, y, mask) - return true - } // match: (VMOVDQU32Masked128 (VCVTPS2UDQ128 x) mask) // result: (VCVTPS2UDQMasked128 x mask) for { @@ -33085,18 +32951,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked128(v *Value) bool { v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked128 (VPMOVZXDQ128 x) mask) - // result: (VPMOVZXDQMasked128 x mask) - for { - if v_0.Op != OpAMD64VPMOVZXDQ128 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVZXDQMasked128) - v.AddArg2(x, mask) - return true - } // match: (VMOVDQU32Masked128 (VDIVPS128 x y) mask) // result: (VDIVPSMasked128 x y mask) for { @@ -33138,6 +32992,30 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked128(v *Value) bool { v.AddArg4(x, y, z, mask) return true } + // match: (VMOVDQU32Masked128 (VPMOVSXDQ128 x) mask) + // result: (VPMOVSXDQMasked128 x mask) + for { + if v_0.Op != OpAMD64VPMOVSXDQ128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXDQMasked128) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked128 (VPMOVZXDQ128 x) mask) + // result: (VPMOVZXDQMasked128 x mask) + for { + if v_0.Op != OpAMD64VPMOVZXDQ128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXDQMasked128) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked128 (VPLZCNTD128 x) mask) // result: (VPLZCNTDMasked128 x mask) for { @@ -33362,6 +33240,68 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked128(v *Value) bool { v.AddArg3(x, y, mask) return true } + // match: (VMOVDQU32Masked128 (VPMOVSDB128_128 x) mask) + // result: (VPMOVSDBMasked128_128 x mask) + for { + if v_0.Op != OpAMD64VPMOVSDB128_128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSDBMasked128_128) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked128 (VPACKSSDW128 x y) mask) + // result: (VPACKSSDWMasked128 x y mask) + for { + if v_0.Op != OpAMD64VPACKSSDW128 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPACKSSDWMasked128) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked128 (VPMOVSDW128_128 x) mask) + // result: (VPMOVSDWMasked128_128 x mask) + for { + if v_0.Op != OpAMD64VPMOVSDW128_128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSDWMasked128_128) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked128 (VPACKUSDW128 x y) mask) + // result: (VPACKUSDWMasked128 x y mask) + for { + if v_0.Op != OpAMD64VPACKUSDW128 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPACKUSDWMasked128) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked128 (VPMOVUSDW128_128 x) mask) + // result: (VPMOVUSDWMasked128_128 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSDW128_128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSDWMasked128_128) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked128 (VSCALEFPS128 x y) mask) // result: (VSCALEFPSMasked128 x y mask) for { @@ -33549,6 +33489,30 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked128(v *Value) bool { v.AddArg3(x, y, mask) return true } + // match: (VMOVDQU32Masked128 (VPMOVDB128_128 x) mask) + // result: (VPMOVDBMasked128_128 x mask) + for { + if v_0.Op != OpAMD64VPMOVDB128_128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVDBMasked128_128) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked128 (VPMOVDW128_128 x) mask) + // result: (VPMOVDWMasked128_128 x mask) + for { + if v_0.Op != OpAMD64VPMOVDW128_128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVDWMasked128_128) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked128 (VPSHUFD128 [a] x) mask) // result: (VPSHUFDMasked128 [a] x mask) for { @@ -33714,91 +33678,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool { v.AddArg4(x, y, z, mask) return true } - // match: (VMOVDQU32Masked256 (VPMOVDB128_256 x) mask) - // result: (VPMOVDBMasked128_256 x mask) - for { - if v_0.Op != OpAMD64VPMOVDB128_256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVDBMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked256 (VPMOVSDB128_256 x) mask) - // result: (VPMOVSDBMasked128_256 x mask) - for { - if v_0.Op != OpAMD64VPMOVSDB128_256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSDBMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked256 (VPMOVDW128_256 x) mask) - // result: (VPMOVDWMasked128_256 x mask) - for { - if v_0.Op != OpAMD64VPMOVDW128_256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVDWMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked256 (VPMOVDW256 x) mask) - // result: (VPMOVDWMasked256 x mask) - for { - if v_0.Op != OpAMD64VPMOVDW256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVDWMasked256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked256 (VPMOVSDW128_256 x) mask) - // result: (VPMOVSDWMasked128_256 x mask) - for { - if v_0.Op != OpAMD64VPMOVSDW128_256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSDWMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked256 (VPMOVSDW256 x) mask) - // result: (VPMOVSDWMasked256 x mask) - for { - if v_0.Op != OpAMD64VPMOVSDW256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSDWMasked256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked256 (VPACKSSDW256 x y) mask) - // result: (VPACKSSDWMasked256 x y mask) - for { - if v_0.Op != OpAMD64VPACKSSDW256 { - break - } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPACKSSDWMasked256) - v.AddArg3(x, y, mask) - return true - } // match: (VMOVDQU32Masked256 (VCVTTPS2DQ256 x) mask) // result: (VCVTTPS2DQMasked256 x mask) for { @@ -33811,67 +33690,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool { v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked256 (VPMOVSXDQ256 x) mask) - // result: (VPMOVSXDQMasked256 x mask) - for { - if v_0.Op != OpAMD64VPMOVSXDQ256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSXDQMasked256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked256 (VPMOVUSDB128_256 x) mask) - // result: (VPMOVUSDBMasked128_256 x mask) - for { - if v_0.Op != OpAMD64VPMOVUSDB128_256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSDBMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked256 (VPMOVUSDW128_256 x) mask) - // result: (VPMOVUSDWMasked128_256 x mask) - for { - if v_0.Op != OpAMD64VPMOVUSDW128_256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSDWMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked256 (VPMOVUSDW256 x) mask) - // result: (VPMOVUSDWMasked256 x mask) - for { - if v_0.Op != OpAMD64VPMOVUSDW256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSDWMasked256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked256 (VPACKUSDW256 x y) mask) - // result: (VPACKUSDWMasked256 x y mask) - for { - if v_0.Op != OpAMD64VPACKUSDW256 { - break - } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPACKUSDWMasked256) - v.AddArg3(x, y, mask) - return true - } // match: (VMOVDQU32Masked256 (VCVTPS2UDQ256 x) mask) // result: (VCVTPS2UDQMasked256 x mask) for { @@ -33884,18 +33702,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool { v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked256 (VPMOVZXDQ256 x) mask) - // result: (VPMOVZXDQMasked256 x mask) - for { - if v_0.Op != OpAMD64VPMOVZXDQ256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVZXDQMasked256) - v.AddArg2(x, mask) - return true - } // match: (VMOVDQU32Masked256 (VDIVPS256 x y) mask) // result: (VDIVPSMasked256 x y mask) for { @@ -33937,6 +33743,30 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool { v.AddArg4(x, y, z, mask) return true } + // match: (VMOVDQU32Masked256 (VPMOVSXDQ256 x) mask) + // result: (VPMOVSXDQMasked256 x mask) + for { + if v_0.Op != OpAMD64VPMOVSXDQ256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXDQMasked256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked256 (VPMOVZXDQ256 x) mask) + // result: (VPMOVZXDQMasked256 x mask) + for { + if v_0.Op != OpAMD64VPMOVZXDQ256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXDQMasked256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked256 (VPLZCNTD256 x) mask) // result: (VPLZCNTDMasked256 x mask) for { @@ -34187,6 +34017,92 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool { v.AddArg3(x, y, mask) return true } + // match: (VMOVDQU32Masked256 (VPMOVSDB128_256 x) mask) + // result: (VPMOVSDBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVSDB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSDBMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked256 (VPACKSSDW256 x y) mask) + // result: (VPACKSSDWMasked256 x y mask) + for { + if v_0.Op != OpAMD64VPACKSSDW256 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPACKSSDWMasked256) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked256 (VPMOVSDW128_256 x) mask) + // result: (VPMOVSDWMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVSDW128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSDWMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked256 (VPMOVSDW256 x) mask) + // result: (VPMOVSDWMasked256 x mask) + for { + if v_0.Op != OpAMD64VPMOVSDW256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSDWMasked256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked256 (VPACKUSDW256 x y) mask) + // result: (VPACKUSDWMasked256 x y mask) + for { + if v_0.Op != OpAMD64VPACKUSDW256 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPACKUSDWMasked256) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked256 (VPMOVUSDW128_256 x) mask) + // result: (VPMOVUSDWMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSDW128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSDWMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked256 (VPMOVUSDW256 x) mask) + // result: (VPMOVUSDWMasked256 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSDW256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSDWMasked256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked256 (VSCALEFPS256 x y) mask) // result: (VSCALEFPSMasked256 x y mask) for { @@ -34374,6 +34290,42 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool { v.AddArg3(x, y, mask) return true } + // match: (VMOVDQU32Masked256 (VPMOVDB128_256 x) mask) + // result: (VPMOVDBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVDB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVDBMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked256 (VPMOVDW128_256 x) mask) + // result: (VPMOVDWMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVDW128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVDWMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked256 (VPMOVDW256 x) mask) + // result: (VPMOVDWMasked256 x mask) + for { + if v_0.Op != OpAMD64VPMOVDW256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVDWMasked256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked256 (VPSHUFD256 [a] x) mask) // result: (VPSHUFDMasked256 [a] x mask) for { @@ -34565,43 +34517,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool { v.AddArg4(x, y, z, mask) return true } - // match: (VMOVDQU32Masked512 (VPMOVDB128_512 x) mask) - // result: (VPMOVDBMasked128_512 x mask) - for { - if v_0.Op != OpAMD64VPMOVDB128_512 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVDBMasked128_512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPMOVSDB128_512 x) mask) - // result: (VPMOVSDBMasked128_512 x mask) - for { - if v_0.Op != OpAMD64VPMOVSDB128_512 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSDBMasked128_512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPACKSSDW512 x y) mask) - // result: (VPACKSSDWMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPACKSSDW512 { - break - } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPACKSSDWMasked512) - v.AddArg3(x, y, mask) - return true - } // match: (VMOVDQU32Masked512 (VCVTTPS2DQ512 x) mask) // result: (VCVTTPS2DQMasked512 x mask) for { @@ -34614,43 +34529,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool { v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked512 (VPMOVSXDQ512 x) mask) - // result: (VPMOVSXDQMasked512 x mask) - for { - if v_0.Op != OpAMD64VPMOVSXDQ512 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSXDQMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPMOVUSDB128_512 x) mask) - // result: (VPMOVUSDBMasked128_512 x mask) - for { - if v_0.Op != OpAMD64VPMOVUSDB128_512 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSDBMasked128_512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPACKUSDW512 x y) mask) - // result: (VPACKUSDWMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPACKUSDW512 { - break - } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPACKUSDWMasked512) - v.AddArg3(x, y, mask) - return true - } // match: (VMOVDQU32Masked512 (VCVTPS2UDQ512 x) mask) // result: (VCVTPS2UDQMasked512 x mask) for { @@ -34663,18 +34541,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool { v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked512 (VPMOVZXDQ512 x) mask) - // result: (VPMOVZXDQMasked512 x mask) - for { - if v_0.Op != OpAMD64VPMOVZXDQ512 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVZXDQMasked512) - v.AddArg2(x, mask) - return true - } // match: (VMOVDQU32Masked512 (VDIVPS512 x y) mask) // result: (VDIVPSMasked512 x y mask) for { @@ -34716,6 +34582,30 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool { v.AddArg4(x, y, z, mask) return true } + // match: (VMOVDQU32Masked512 (VPMOVSXDQ512 x) mask) + // result: (VPMOVSXDQMasked512 x mask) + for { + if v_0.Op != OpAMD64VPMOVSXDQ512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXDQMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPMOVZXDQ512 x) mask) + // result: (VPMOVZXDQMasked512 x mask) + for { + if v_0.Op != OpAMD64VPMOVZXDQ512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXDQMasked512) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked512 (VPLZCNTD512 x) mask) // result: (VPLZCNTDMasked512 x mask) for { @@ -35003,6 +34893,44 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool { v.AddArg3(x, y, mask) return true } + // match: (VMOVDQU32Masked512 (VPMOVSDB128_512 x) mask) + // result: (VPMOVSDBMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVSDB128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSDBMasked128_512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPACKSSDW512 x y) mask) + // result: (VPACKSSDWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPACKSSDW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPACKSSDWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPACKUSDW512 x y) mask) + // result: (VPACKUSDWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPACKUSDW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPACKUSDWMasked512) + v.AddArg3(x, y, mask) + return true + } // match: (VMOVDQU32Masked512 (VSCALEFPS512 x y) mask) // result: (VSCALEFPSMasked512 x y mask) for { @@ -35190,6 +35118,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool { v.AddArg3(x, y, mask) return true } + // match: (VMOVDQU32Masked512 (VPMOVDB128_512 x) mask) + // result: (VPMOVDBMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVDB128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVDBMasked128_512) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked512 (VPXORD512 x y) mask) // result: (VPXORDMasked512 x y mask) for { @@ -35356,114 +35296,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked128(v *Value) bool { v.AddArg4(x, y, z, mask) return true } - // match: (VMOVDQU64Masked128 (VPMOVQB128_128 x) mask) - // result: (VPMOVQBMasked128_128 x mask) - for { - if v_0.Op != OpAMD64VPMOVQB128_128 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVQBMasked128_128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPMOVSQB128_128 x) mask) - // result: (VPMOVSQBMasked128_128 x mask) - for { - if v_0.Op != OpAMD64VPMOVSQB128_128 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSQBMasked128_128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPMOVQW128_128 x) mask) - // result: (VPMOVQWMasked128_128 x mask) - for { - if v_0.Op != OpAMD64VPMOVQW128_128 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVQWMasked128_128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPMOVSQW128_128 x) mask) - // result: (VPMOVSQWMasked128_128 x mask) - for { - if v_0.Op != OpAMD64VPMOVSQW128_128 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSQWMasked128_128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPMOVQD128_128 x) mask) - // result: (VPMOVQDMasked128_128 x mask) - for { - if v_0.Op != OpAMD64VPMOVQD128_128 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVQDMasked128_128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPMOVSQD128_128 x) mask) - // result: (VPMOVSQDMasked128_128 x mask) - for { - if v_0.Op != OpAMD64VPMOVSQD128_128 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSQDMasked128_128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPMOVUSQB128_128 x) mask) - // result: (VPMOVUSQBMasked128_128 x mask) - for { - if v_0.Op != OpAMD64VPMOVUSQB128_128 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSQBMasked128_128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPMOVUSQW128_128 x) mask) - // result: (VPMOVUSQWMasked128_128 x mask) - for { - if v_0.Op != OpAMD64VPMOVUSQW128_128 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSQWMasked128_128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPMOVUSQD128_128 x) mask) - // result: (VPMOVUSQDMasked128_128 x mask) - for { - if v_0.Op != OpAMD64VPMOVUSQD128_128 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSQDMasked128_128) - v.AddArg2(x, mask) - return true - } // match: (VMOVDQU64Masked128 (VDIVPD128 x y) mask) // result: (VDIVPDMasked128 x y mask) for { @@ -35725,6 +35557,66 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked128(v *Value) bool { v.AddArg3(x, y, mask) return true } + // match: (VMOVDQU64Masked128 (VPMOVSQB128_128 x) mask) + // result: (VPMOVSQBMasked128_128 x mask) + for { + if v_0.Op != OpAMD64VPMOVSQB128_128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQBMasked128_128) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked128 (VPMOVSQW128_128 x) mask) + // result: (VPMOVSQWMasked128_128 x mask) + for { + if v_0.Op != OpAMD64VPMOVSQW128_128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQWMasked128_128) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked128 (VPMOVSQD128_128 x) mask) + // result: (VPMOVSQDMasked128_128 x mask) + for { + if v_0.Op != OpAMD64VPMOVSQD128_128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQDMasked128_128) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked128 (VPMOVUSQW128_128 x) mask) + // result: (VPMOVUSQWMasked128_128 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSQW128_128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQWMasked128_128) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked128 (VPMOVUSQD128_128 x) mask) + // result: (VPMOVUSQDMasked128_128 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSQD128_128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQDMasked128_128) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU64Masked128 (VSCALEFPD128 x y) mask) // result: (VSCALEFPDMasked128 x y mask) for { @@ -35912,6 +35804,42 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked128(v *Value) bool { v.AddArg3(x, y, mask) return true } + // match: (VMOVDQU64Masked128 (VPMOVQB128_128 x) mask) + // result: (VPMOVQBMasked128_128 x mask) + for { + if v_0.Op != OpAMD64VPMOVQB128_128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQBMasked128_128) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked128 (VPMOVQW128_128 x) mask) + // result: (VPMOVQWMasked128_128 x mask) + for { + if v_0.Op != OpAMD64VPMOVQW128_128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQWMasked128_128) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked128 (VPMOVQD128_128 x) mask) + // result: (VPMOVQDMasked128_128 x mask) + for { + if v_0.Op != OpAMD64VPMOVQD128_128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQDMasked128_128) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU64Masked128 (VPSLLQ128const [a] x) mask) // result: (VPSLLQMasked128const [a] x mask) for { @@ -36063,150 +35991,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked256(v *Value) bool { v.AddArg4(x, y, z, mask) return true } - // match: (VMOVDQU64Masked256 (VPMOVQB128_256 x) mask) - // result: (VPMOVQBMasked128_256 x mask) - for { - if v_0.Op != OpAMD64VPMOVQB128_256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVQBMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPMOVSQB128_256 x) mask) - // result: (VPMOVSQBMasked128_256 x mask) - for { - if v_0.Op != OpAMD64VPMOVSQB128_256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSQBMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPMOVQW128_256 x) mask) - // result: (VPMOVQWMasked128_256 x mask) - for { - if v_0.Op != OpAMD64VPMOVQW128_256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVQWMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPMOVSQW128_256 x) mask) - // result: (VPMOVSQWMasked128_256 x mask) - for { - if v_0.Op != OpAMD64VPMOVSQW128_256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSQWMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPMOVQD128_256 x) mask) - // result: (VPMOVQDMasked128_256 x mask) - for { - if v_0.Op != OpAMD64VPMOVQD128_256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVQDMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPMOVQD256 x) mask) - // result: (VPMOVQDMasked256 x mask) - for { - if v_0.Op != OpAMD64VPMOVQD256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVQDMasked256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPMOVSQD128_256 x) mask) - // result: (VPMOVSQDMasked128_256 x mask) - for { - if v_0.Op != OpAMD64VPMOVSQD128_256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSQDMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPMOVSQD256 x) mask) - // result: (VPMOVSQDMasked256 x mask) - for { - if v_0.Op != OpAMD64VPMOVSQD256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSQDMasked256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPMOVUSQB128_256 x) mask) - // result: (VPMOVUSQBMasked128_256 x mask) - for { - if v_0.Op != OpAMD64VPMOVUSQB128_256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSQBMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPMOVUSQW128_256 x) mask) - // result: (VPMOVUSQWMasked128_256 x mask) - for { - if v_0.Op != OpAMD64VPMOVUSQW128_256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSQWMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPMOVUSQD128_256 x) mask) - // result: (VPMOVUSQDMasked128_256 x mask) - for { - if v_0.Op != OpAMD64VPMOVUSQD128_256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSQDMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPMOVUSQD256 x) mask) - // result: (VPMOVUSQDMasked256 x mask) - for { - if v_0.Op != OpAMD64VPMOVUSQD256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSQDMasked256) - v.AddArg2(x, mask) - return true - } // match: (VMOVDQU64Masked256 (VDIVPD256 x y) mask) // result: (VDIVPDMasked256 x y mask) for { @@ -36494,6 +36278,90 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked256(v *Value) bool { v.AddArg3(x, y, mask) return true } + // match: (VMOVDQU64Masked256 (VPMOVSQB128_256 x) mask) + // result: (VPMOVSQBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVSQB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQBMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked256 (VPMOVSQW128_256 x) mask) + // result: (VPMOVSQWMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVSQW128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQWMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked256 (VPMOVSQD128_256 x) mask) + // result: (VPMOVSQDMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVSQD128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQDMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked256 (VPMOVSQD256 x) mask) + // result: (VPMOVSQDMasked256 x mask) + for { + if v_0.Op != OpAMD64VPMOVSQD256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQDMasked256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked256 (VPMOVUSQW128_256 x) mask) + // result: (VPMOVUSQWMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSQW128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQWMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked256 (VPMOVUSQD128_256 x) mask) + // result: (VPMOVUSQDMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSQD128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQDMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked256 (VPMOVUSQD256 x) mask) + // result: (VPMOVUSQDMasked256 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSQD256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQDMasked256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU64Masked256 (VSCALEFPD256 x y) mask) // result: (VSCALEFPDMasked256 x y mask) for { @@ -36681,6 +36549,54 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked256(v *Value) bool { v.AddArg3(x, y, mask) return true } + // match: (VMOVDQU64Masked256 (VPMOVQB128_256 x) mask) + // result: (VPMOVQBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVQB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQBMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked256 (VPMOVQW128_256 x) mask) + // result: (VPMOVQWMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVQW128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQWMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked256 (VPMOVQD128_256 x) mask) + // result: (VPMOVQDMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVQD128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQDMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked256 (VPMOVQD256 x) mask) + // result: (VPMOVQDMasked256 x mask) + for { + if v_0.Op != OpAMD64VPMOVQD256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQDMasked256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU64Masked256 (VPSLLQ256const [a] x) mask) // result: (VPSLLQMasked256const [a] x mask) for { @@ -36858,78 +36774,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked512(v *Value) bool { v.AddArg4(x, y, z, mask) return true } - // match: (VMOVDQU64Masked512 (VPMOVQB128_512 x) mask) - // result: (VPMOVQBMasked128_512 x mask) - for { - if v_0.Op != OpAMD64VPMOVQB128_512 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVQBMasked128_512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPMOVSQB128_512 x) mask) - // result: (VPMOVSQBMasked128_512 x mask) - for { - if v_0.Op != OpAMD64VPMOVSQB128_512 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSQBMasked128_512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPMOVQW128_512 x) mask) - // result: (VPMOVQWMasked128_512 x mask) - for { - if v_0.Op != OpAMD64VPMOVQW128_512 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVQWMasked128_512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPMOVSQW128_512 x) mask) - // result: (VPMOVSQWMasked128_512 x mask) - for { - if v_0.Op != OpAMD64VPMOVSQW128_512 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSQWMasked128_512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPMOVUSQB128_512 x) mask) - // result: (VPMOVUSQBMasked128_512 x mask) - for { - if v_0.Op != OpAMD64VPMOVUSQB128_512 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSQBMasked128_512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPMOVUSQW128_512 x) mask) - // result: (VPMOVUSQWMasked128_512 x mask) - for { - if v_0.Op != OpAMD64VPMOVUSQW128_512 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSQWMasked128_512) - v.AddArg2(x, mask) - return true - } // match: (VMOVDQU64Masked512 (VDIVPD512 x y) mask) // result: (VDIVPDMasked512 x y mask) for { @@ -37230,6 +37074,42 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked512(v *Value) bool { v.AddArg3(x, y, mask) return true } + // match: (VMOVDQU64Masked512 (VPMOVSQB128_512 x) mask) + // result: (VPMOVSQBMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVSQB128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQBMasked128_512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPMOVSQW128_512 x) mask) + // result: (VPMOVSQWMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVSQW128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQWMasked128_512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPMOVUSQW128_512 x) mask) + // result: (VPMOVUSQWMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSQW128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQWMasked128_512) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU64Masked512 (VSCALEFPD512 x y) mask) // result: (VSCALEFPDMasked512 x y mask) for { @@ -37417,6 +37297,30 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked512(v *Value) bool { v.AddArg3(x, y, mask) return true } + // match: (VMOVDQU64Masked512 (VPMOVQB128_512 x) mask) + // result: (VPMOVQBMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVQB128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQBMasked128_512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPMOVQW128_512 x) mask) + // result: (VPMOVQWMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVQW128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQWMasked128_512) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU64Masked512 (VPXORQ512 x y) mask) // result: (VPXORQMasked512 x y mask) for { @@ -37568,15 +37472,27 @@ func rewriteValueAMD64_OpAMD64VMOVDQU8Masked128(v *Value) bool { v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked128 (VPMOVSXBW128 x) mask) - // result: (VPMOVSXBWMasked128 x mask) + // match: (VMOVDQU8Masked128 (VPMOVSXBQ128 x) mask) + // result: (VPMOVSXBQMasked128 x mask) for { - if v_0.Op != OpAMD64VPMOVSXBW128 { + if v_0.Op != OpAMD64VPMOVSXBQ128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVSXBWMasked128) + v.reset(OpAMD64VPMOVSXBQMasked128) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU8Masked128 (VPMOVZXBQ128 x) mask) + // result: (VPMOVZXBQMasked128 x mask) + for { + if v_0.Op != OpAMD64VPMOVZXBQ128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXBQMasked128) v.AddArg2(x, mask) return true } @@ -37592,30 +37508,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU8Masked128(v *Value) bool { v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked128 (VPMOVSXBQ128 x) mask) - // result: (VPMOVSXBQMasked128 x mask) - for { - if v_0.Op != OpAMD64VPMOVSXBQ128 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSXBQMasked128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU8Masked128 (VPMOVZXBW128 x) mask) - // result: (VPMOVZXBWMasked128 x mask) - for { - if v_0.Op != OpAMD64VPMOVZXBW128 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVZXBWMasked128) - v.AddArg2(x, mask) - return true - } // match: (VMOVDQU8Masked128 (VPMOVZXBD128 x) mask) // result: (VPMOVZXBDMasked128 x mask) for { @@ -37628,15 +37520,27 @@ func rewriteValueAMD64_OpAMD64VMOVDQU8Masked128(v *Value) bool { v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked128 (VPMOVZXBQ128 x) mask) - // result: (VPMOVZXBQMasked128 x mask) + // match: (VMOVDQU8Masked128 (VPMOVSXBW128 x) mask) + // result: (VPMOVSXBWMasked128 x mask) for { - if v_0.Op != OpAMD64VPMOVZXBQ128 { + if v_0.Op != OpAMD64VPMOVSXBW128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVZXBQMasked128) + v.reset(OpAMD64VPMOVSXBWMasked128) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU8Masked128 (VPMOVZXBW128 x) mask) + // result: (VPMOVZXBWMasked128 x mask) + for { + if v_0.Op != OpAMD64VPMOVZXBW128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXBWMasked128) v.AddArg2(x, mask) return true } @@ -37922,15 +37826,27 @@ func rewriteValueAMD64_OpAMD64VMOVDQU8Masked256(v *Value) bool { v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked256 (VPMOVSXBW256 x) mask) - // result: (VPMOVSXBWMasked256 x mask) + // match: (VMOVDQU8Masked256 (VPMOVSXBQ256 x) mask) + // result: (VPMOVSXBQMasked256 x mask) for { - if v_0.Op != OpAMD64VPMOVSXBW256 { + if v_0.Op != OpAMD64VPMOVSXBQ256 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVSXBWMasked256) + v.reset(OpAMD64VPMOVSXBQMasked256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU8Masked256 (VPMOVZXBQ256 x) mask) + // result: (VPMOVZXBQMasked256 x mask) + for { + if v_0.Op != OpAMD64VPMOVZXBQ256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXBQMasked256) v.AddArg2(x, mask) return true } @@ -37946,30 +37862,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU8Masked256(v *Value) bool { v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked256 (VPMOVSXBQ256 x) mask) - // result: (VPMOVSXBQMasked256 x mask) - for { - if v_0.Op != OpAMD64VPMOVSXBQ256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSXBQMasked256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU8Masked256 (VPMOVZXBW256 x) mask) - // result: (VPMOVZXBWMasked256 x mask) - for { - if v_0.Op != OpAMD64VPMOVZXBW256 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVZXBWMasked256) - v.AddArg2(x, mask) - return true - } // match: (VMOVDQU8Masked256 (VPMOVZXBD256 x) mask) // result: (VPMOVZXBDMasked256 x mask) for { @@ -37982,15 +37874,27 @@ func rewriteValueAMD64_OpAMD64VMOVDQU8Masked256(v *Value) bool { v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked256 (VPMOVZXBQ256 x) mask) - // result: (VPMOVZXBQMasked256 x mask) + // match: (VMOVDQU8Masked256 (VPMOVSXBW256 x) mask) + // result: (VPMOVSXBWMasked256 x mask) for { - if v_0.Op != OpAMD64VPMOVZXBQ256 { + if v_0.Op != OpAMD64VPMOVSXBW256 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVZXBQMasked256) + v.reset(OpAMD64VPMOVSXBWMasked256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU8Masked256 (VPMOVZXBW256 x) mask) + // result: (VPMOVZXBWMasked256 x mask) + for { + if v_0.Op != OpAMD64VPMOVZXBW256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXBWMasked256) v.AddArg2(x, mask) return true } @@ -38276,6 +38180,30 @@ func rewriteValueAMD64_OpAMD64VMOVDQU8Masked512(v *Value) bool { v.AddArg3(x, y, mask) return true } + // match: (VMOVDQU8Masked512 (VPMOVSXBQ512 x) mask) + // result: (VPMOVSXBQMasked512 x mask) + for { + if v_0.Op != OpAMD64VPMOVSXBQ512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXBQMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPMOVZXBQ512 x) mask) + // result: (VPMOVZXBQMasked512 x mask) + for { + if v_0.Op != OpAMD64VPMOVZXBQ512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXBQMasked512) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU8Masked512 (VPMOVSXBW512 x) mask) // result: (VPMOVSXBWMasked512 x mask) for { @@ -38300,18 +38228,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU8Masked512(v *Value) bool { v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked512 (VPMOVSXBQ512 x) mask) - // result: (VPMOVSXBQMasked512 x mask) - for { - if v_0.Op != OpAMD64VPMOVSXBQ512 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSXBQMasked512) - v.AddArg2(x, mask) - return true - } // match: (VMOVDQU8Masked512 (VPMOVZXBW512 x) mask) // result: (VPMOVZXBWMasked512 x mask) for { @@ -38336,18 +38252,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU8Masked512(v *Value) bool { v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked512 (VPMOVZXBQ512 x) mask) - // result: (VPMOVZXBQMasked512 x mask) - for { - if v_0.Op != OpAMD64VPMOVZXBQ512 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVZXBQMasked512) - v.AddArg2(x, mask) - return true - } // match: (VMOVDQU8Masked512 (VGF2P8AFFINEINVQB512 [a] x y) mask) // result: (VGF2P8AFFINEINVQBMasked512 [a] x y mask) for { @@ -41357,19 +41261,6 @@ func rewriteValueAMD64_OpAMD64VPBLENDMDMasked512(v *Value) bool { v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMOVUSDB128_512 x) mask) - // result: (VPMOVUSDBMasked128_512Merging dst x mask) - for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSDB128_512 { - break - } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMOVUSDBMasked128_512Merging) - v.AddArg3(dst, x, mask) - return true - } // match: (VPBLENDMDMasked512 dst (VPMOVUSDW256 x) mask) // result: (VPMOVUSDWMasked256Merging dst x mask) for { @@ -42013,19 +41904,6 @@ func rewriteValueAMD64_OpAMD64VPBLENDMQMasked512(v *Value) bool { v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVUSQB128_512 x) mask) - // result: (VPMOVUSQBMasked128_512Merging dst x mask) - for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSQB128_512 { - break - } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMOVUSQBMasked128_512Merging) - v.AddArg3(dst, x, mask) - return true - } // match: (VPBLENDMQMasked512 dst (VPMOVUSQD256 x) mask) // result: (VPMOVUSQDMasked256Merging dst x mask) for { @@ -44650,25 +44528,6 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVUSDB128_128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVUSDBMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) - for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSDB128_128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPMOVUSDBMasked128_128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) - return true - } // match: (VPBLENDVB128 dst (VPMOVUSDW128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) // result: (VPMOVUSDWMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) @@ -44688,25 +44547,6 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVUSQB128_128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVUSQBMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) - for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSQB128_128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPMOVUSQBMasked128_128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) - return true - } // match: (VPBLENDVB128 dst (VPMOVUSQD128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) // result: (VPMOVUSQDMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) @@ -44745,25 +44585,6 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVUSWB128_128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVUSWBMasked128_128Merging dst x (VPMOVVec16x8ToM mask)) - for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSWB128_128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPMOVUSWBMasked128_128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) - return true - } // match: (VPBLENDVB128 dst (VPMOVWB128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) // result: (VPMOVWBMasked128_128Merging dst x (VPMOVVec16x8ToM mask)) @@ -47596,25 +47417,6 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVUSDB128_256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVUSDBMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) - for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSDB128_256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPMOVUSDBMasked128_256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) - return true - } // match: (VPBLENDVB256 dst (VPMOVUSDW128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) // result: (VPMOVUSDWMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) @@ -47634,25 +47436,6 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVUSQB128_256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVUSQBMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) - for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSQB128_256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPMOVUSQBMasked128_256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) - return true - } // match: (VPBLENDVB256 dst (VPMOVUSQD128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) // result: (VPMOVUSQDMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) @@ -47691,25 +47474,6 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVUSWB128_256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVUSWBMasked128_256Merging dst x (VPMOVVec16x16ToM mask)) - for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSWB128_256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPMOVUSWBMasked128_256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) - return true - } // match: (VPBLENDVB256 dst (VPMOVWB128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) // result: (VPMOVWBMasked128_256Merging dst x (VPMOVVec16x16ToM mask)) diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go index 413cf92c88c..987be732109 100644 --- a/src/cmd/compile/internal/ssagen/simdintrinsics.go +++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go @@ -261,126 +261,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint8x16.ConcatShiftBytesRight", opLen2Imm8(ssa.OpConcatShiftBytesRightUint8x16, types.TypeVec128, 0), sys.AMD64) addF(simdPackage, "Uint8x32.ConcatShiftBytesRightGrouped", opLen2Imm8(ssa.OpConcatShiftBytesRightGroupedUint8x32, types.TypeVec256, 0), sys.AMD64) addF(simdPackage, "Uint8x64.ConcatShiftBytesRightGrouped", opLen2Imm8(ssa.OpConcatShiftBytesRightGroupedUint8x64, types.TypeVec512, 0), sys.AMD64) - addF(simdPackage, "Int16x8.ConvertToInt8", opLen1(ssa.OpConvertToInt8Int16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.ConvertToInt8", opLen1(ssa.OpConvertToInt8Int16x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x32.ConvertToInt8", opLen1(ssa.OpConvertToInt8Int16x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x4.ConvertToInt8", opLen1(ssa.OpConvertToInt8Int32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.ConvertToInt8", opLen1(ssa.OpConvertToInt8Int32x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x16.ConvertToInt8", opLen1(ssa.OpConvertToInt8Int32x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x2.ConvertToInt8", opLen1(ssa.OpConvertToInt8Int64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.ConvertToInt8", opLen1(ssa.OpConvertToInt8Int64x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x8.ConvertToInt8", opLen1(ssa.OpConvertToInt8Int64x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x8.ConvertToInt8Saturated", opLen1(ssa.OpConvertToInt8SaturatedInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.ConvertToInt8Saturated", opLen1(ssa.OpConvertToInt8SaturatedInt16x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x32.ConvertToInt8Saturated", opLen1(ssa.OpConvertToInt8SaturatedInt16x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x4.ConvertToInt8Saturated", opLen1(ssa.OpConvertToInt8SaturatedInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.ConvertToInt8Saturated", opLen1(ssa.OpConvertToInt8SaturatedInt32x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x16.ConvertToInt8Saturated", opLen1(ssa.OpConvertToInt8SaturatedInt32x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x2.ConvertToInt8Saturated", opLen1(ssa.OpConvertToInt8SaturatedInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.ConvertToInt8Saturated", opLen1(ssa.OpConvertToInt8SaturatedInt64x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x8.ConvertToInt8Saturated", opLen1(ssa.OpConvertToInt8SaturatedInt64x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x16.ConvertToInt16", opLen1(ssa.OpConvertToInt16Int8x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int8x32.ConvertToInt16", opLen1(ssa.OpConvertToInt16Int8x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.ConvertToInt16", opLen1(ssa.OpConvertToInt16Int32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.ConvertToInt16", opLen1(ssa.OpConvertToInt16Int32x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x16.ConvertToInt16", opLen1(ssa.OpConvertToInt16Int32x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x2.ConvertToInt16", opLen1(ssa.OpConvertToInt16Int64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.ConvertToInt16", opLen1(ssa.OpConvertToInt16Int64x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x8.ConvertToInt16", opLen1(ssa.OpConvertToInt16Int64x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x4.ConvertToInt16Saturated", opLen1(ssa.OpConvertToInt16SaturatedInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.ConvertToInt16Saturated", opLen1(ssa.OpConvertToInt16SaturatedInt32x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x16.ConvertToInt16Saturated", opLen1(ssa.OpConvertToInt16SaturatedInt32x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x2.ConvertToInt16Saturated", opLen1(ssa.OpConvertToInt16SaturatedInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.ConvertToInt16Saturated", opLen1(ssa.OpConvertToInt16SaturatedInt64x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x8.ConvertToInt16Saturated", opLen1(ssa.OpConvertToInt16SaturatedInt64x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x4.ConvertToInt16SaturatedPacked", opLen2(ssa.OpConvertToInt16SaturatedPackedInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.ConvertToInt16SaturatedPacked", opLen2(ssa.OpConvertToInt16SaturatedPackedInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.ConvertToInt16SaturatedPacked", opLen2(ssa.OpConvertToInt16SaturatedPackedInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.ConvertToInt16x8", opLen1(ssa.OpConvertToInt16x8Int8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x16.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.ConvertToInt32", opLen1(ssa.OpConvertToInt32Int8x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.ConvertToInt32", opLen1(ssa.OpConvertToInt32Int16x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x16.ConvertToInt32", opLen1(ssa.OpConvertToInt32Int16x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.ConvertToInt32", opLen1(ssa.OpConvertToInt32Int64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.ConvertToInt32", opLen1(ssa.OpConvertToInt32Int64x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x8.ConvertToInt32", opLen1(ssa.OpConvertToInt32Int64x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x2.ConvertToInt32Saturated", opLen1(ssa.OpConvertToInt32SaturatedInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.ConvertToInt32Saturated", opLen1(ssa.OpConvertToInt32SaturatedInt64x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x8.ConvertToInt32Saturated", opLen1(ssa.OpConvertToInt32SaturatedInt64x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int8x16.ConvertToInt32x4", opLen1(ssa.OpConvertToInt32x4Int8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x8.ConvertToInt32x4", opLen1(ssa.OpConvertToInt32x4Int16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x16.ConvertToInt32x8", opLen1(ssa.OpConvertToInt32x8Int8x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x8.ConvertToInt64", opLen1(ssa.OpConvertToInt64Int16x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.ConvertToInt64", opLen1(ssa.OpConvertToInt64Int32x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x8.ConvertToInt64", opLen1(ssa.OpConvertToInt64Int32x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.ConvertToInt64x2", opLen1(ssa.OpConvertToInt64x2Int8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x8.ConvertToInt64x2", opLen1(ssa.OpConvertToInt64x2Int16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x4.ConvertToInt64x2", opLen1(ssa.OpConvertToInt64x2Int32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x16.ConvertToInt64x4", opLen1(ssa.OpConvertToInt64x4Int8x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int8x16.ConvertToInt64x8", opLen1(ssa.OpConvertToInt64x8Int8x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.ConvertToUint8", opLen1(ssa.OpConvertToUint8Uint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.ConvertToUint8", opLen1(ssa.OpConvertToUint8Uint16x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x32.ConvertToUint8", opLen1(ssa.OpConvertToUint8Uint16x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x4.ConvertToUint8", opLen1(ssa.OpConvertToUint8Uint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.ConvertToUint8", opLen1(ssa.OpConvertToUint8Uint32x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x16.ConvertToUint8", opLen1(ssa.OpConvertToUint8Uint32x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x2.ConvertToUint8", opLen1(ssa.OpConvertToUint8Uint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.ConvertToUint8", opLen1(ssa.OpConvertToUint8Uint64x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x8.ConvertToUint8", opLen1(ssa.OpConvertToUint8Uint64x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x8.ConvertToUint8Saturated", opLen1(ssa.OpConvertToUint8SaturatedUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.ConvertToUint8Saturated", opLen1(ssa.OpConvertToUint8SaturatedUint16x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x32.ConvertToUint8Saturated", opLen1(ssa.OpConvertToUint8SaturatedUint16x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x4.ConvertToUint8Saturated", opLen1(ssa.OpConvertToUint8SaturatedUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.ConvertToUint8Saturated", opLen1(ssa.OpConvertToUint8SaturatedUint32x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x16.ConvertToUint8Saturated", opLen1(ssa.OpConvertToUint8SaturatedUint32x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x2.ConvertToUint8Saturated", opLen1(ssa.OpConvertToUint8SaturatedUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.ConvertToUint8Saturated", opLen1(ssa.OpConvertToUint8SaturatedUint64x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x8.ConvertToUint8Saturated", opLen1(ssa.OpConvertToUint8SaturatedUint64x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x16.ConvertToUint16", opLen1(ssa.OpConvertToUint16Uint8x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x32.ConvertToUint16", opLen1(ssa.OpConvertToUint16Uint8x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.ConvertToUint16", opLen1(ssa.OpConvertToUint16Uint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.ConvertToUint16", opLen1(ssa.OpConvertToUint16Uint32x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x16.ConvertToUint16", opLen1(ssa.OpConvertToUint16Uint32x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x2.ConvertToUint16", opLen1(ssa.OpConvertToUint16Uint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.ConvertToUint16", opLen1(ssa.OpConvertToUint16Uint64x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x8.ConvertToUint16", opLen1(ssa.OpConvertToUint16Uint64x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x4.ConvertToUint16Saturated", opLen1(ssa.OpConvertToUint16SaturatedUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.ConvertToUint16Saturated", opLen1(ssa.OpConvertToUint16SaturatedUint32x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x16.ConvertToUint16Saturated", opLen1(ssa.OpConvertToUint16SaturatedUint32x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x2.ConvertToUint16Saturated", opLen1(ssa.OpConvertToUint16SaturatedUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.ConvertToUint16Saturated", opLen1(ssa.OpConvertToUint16SaturatedUint64x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x8.ConvertToUint16Saturated", opLen1(ssa.OpConvertToUint16SaturatedUint64x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x4.ConvertToUint16SaturatedPacked", opLen2(ssa.OpConvertToUint16SaturatedPackedUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.ConvertToUint16SaturatedPacked", opLen2(ssa.OpConvertToUint16SaturatedPackedUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.ConvertToUint16SaturatedPacked", opLen2(ssa.OpConvertToUint16SaturatedPackedUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.ConvertToUint16x8", opLen1(ssa.OpConvertToUint16x8Uint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x16.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.ConvertToUint32", opLen1(ssa.OpConvertToUint32Uint8x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.ConvertToUint32", opLen1(ssa.OpConvertToUint32Uint16x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x16.ConvertToUint32", opLen1(ssa.OpConvertToUint32Uint16x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.ConvertToUint32", opLen1(ssa.OpConvertToUint32Uint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.ConvertToUint32", opLen1(ssa.OpConvertToUint32Uint64x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x8.ConvertToUint32", opLen1(ssa.OpConvertToUint32Uint64x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x2.ConvertToUint32Saturated", opLen1(ssa.OpConvertToUint32SaturatedUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.ConvertToUint32Saturated", opLen1(ssa.OpConvertToUint32SaturatedUint64x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x8.ConvertToUint32Saturated", opLen1(ssa.OpConvertToUint32SaturatedUint64x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x16.ConvertToUint32x4", opLen1(ssa.OpConvertToUint32x4Uint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x8.ConvertToUint32x4", opLen1(ssa.OpConvertToUint32x4Uint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x16.ConvertToUint32x8", opLen1(ssa.OpConvertToUint32x8Uint8x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x8.ConvertToUint64", opLen1(ssa.OpConvertToUint64Uint16x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.ConvertToUint64", opLen1(ssa.OpConvertToUint64Uint32x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x8.ConvertToUint64", opLen1(ssa.OpConvertToUint64Uint32x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.ConvertToUint64x2", opLen1(ssa.OpConvertToUint64x2Uint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x8.ConvertToUint64x2", opLen1(ssa.OpConvertToUint64x2Uint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x4.ConvertToUint64x2", opLen1(ssa.OpConvertToUint64x2Uint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x8.ConvertToUint64x4", opLen1(ssa.OpConvertToUint64x4Int16x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x16.ConvertToUint64x4", opLen1(ssa.OpConvertToUint64x4Uint8x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x8.ConvertToUint64x4", opLen1(ssa.OpConvertToUint64x4Uint16x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x16.ConvertToUint64x8", opLen1(ssa.OpConvertToUint64x8Uint8x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x16.CopySign", opLen2(ssa.OpCopySignInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x32.CopySign", opLen2(ssa.OpCopySignInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x8.CopySign", opLen2(ssa.OpCopySignInt16x8, types.TypeVec128), sys.AMD64) @@ -465,6 +351,42 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.Expand", opLen2(ssa.OpExpandUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.Expand", opLen2(ssa.OpExpandUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.Expand", opLen2(ssa.OpExpandUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.ExtendLo2ToInt64x2", opLen1(ssa.OpExtendLo2ToInt64x2Int8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x8.ExtendLo2ToInt64x2", opLen1(ssa.OpExtendLo2ToInt64x2Int16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x4.ExtendLo2ToInt64x2", opLen1(ssa.OpExtendLo2ToInt64x2Int32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x16.ExtendLo2ToUint64x2", opLen1(ssa.OpExtendLo2ToUint64x2Uint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x8.ExtendLo2ToUint64x2", opLen1(ssa.OpExtendLo2ToUint64x2Uint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x4.ExtendLo2ToUint64x2", opLen1(ssa.OpExtendLo2ToUint64x2Uint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x16.ExtendLo4ToInt32x4", opLen1(ssa.OpExtendLo4ToInt32x4Int8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x8.ExtendLo4ToInt32x4", opLen1(ssa.OpExtendLo4ToInt32x4Int16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x16.ExtendLo4ToInt64x4", opLen1(ssa.OpExtendLo4ToInt64x4Int8x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x8.ExtendLo4ToInt64x4", opLen1(ssa.OpExtendLo4ToInt64x4Int16x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x16.ExtendLo4ToUint32x4", opLen1(ssa.OpExtendLo4ToUint32x4Uint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x8.ExtendLo4ToUint32x4", opLen1(ssa.OpExtendLo4ToUint32x4Uint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x16.ExtendLo4ToUint64x4", opLen1(ssa.OpExtendLo4ToUint64x4Uint8x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x8.ExtendLo4ToUint64x4", opLen1(ssa.OpExtendLo4ToUint64x4Uint16x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x16.ExtendLo8ToInt16x8", opLen1(ssa.OpExtendLo8ToInt16x8Int8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x16.ExtendLo8ToInt32x8", opLen1(ssa.OpExtendLo8ToInt32x8Int8x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x16.ExtendLo8ToInt64x8", opLen1(ssa.OpExtendLo8ToInt64x8Int8x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.ExtendLo8ToUint16x8", opLen1(ssa.OpExtendLo8ToUint16x8Uint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x16.ExtendLo8ToUint32x8", opLen1(ssa.OpExtendLo8ToUint32x8Uint8x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x16.ExtendLo8ToUint64x8", opLen1(ssa.OpExtendLo8ToUint64x8Uint8x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.ExtendToInt16", opLen1(ssa.OpExtendToInt16Int8x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x32.ExtendToInt16", opLen1(ssa.OpExtendToInt16Int8x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.ExtendToInt32", opLen1(ssa.OpExtendToInt32Int8x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.ExtendToInt32", opLen1(ssa.OpExtendToInt32Int16x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x16.ExtendToInt32", opLen1(ssa.OpExtendToInt32Int16x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.ExtendToInt64", opLen1(ssa.OpExtendToInt64Int16x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.ExtendToInt64", opLen1(ssa.OpExtendToInt64Int32x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x8.ExtendToInt64", opLen1(ssa.OpExtendToInt64Int32x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.ExtendToUint16", opLen1(ssa.OpExtendToUint16Uint8x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x32.ExtendToUint16", opLen1(ssa.OpExtendToUint16Uint8x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.ExtendToUint32", opLen1(ssa.OpExtendToUint32Uint8x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.ExtendToUint32", opLen1(ssa.OpExtendToUint32Uint16x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x16.ExtendToUint32", opLen1(ssa.OpExtendToUint32Uint16x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.ExtendToUint64", opLen1(ssa.OpExtendToUint64Uint16x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.ExtendToUint64", opLen1(ssa.OpExtendToUint64Uint32x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x8.ExtendToUint64", opLen1(ssa.OpExtendToUint64Uint32x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.Floor", opLen1(ssa.OpFloorFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.Floor", opLen1(ssa.OpFloorFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x2.Floor", opLen1(ssa.OpFloorFloat64x2, types.TypeVec128), sys.AMD64) @@ -945,6 +867,48 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint32x4.SHA256Message1", opLen2(ssa.OpSHA256Message1Uint32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint32x4.SHA256Message2", opLen2(ssa.OpSHA256Message2Uint32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint32x4.SHA256TwoRounds", opLen3(ssa.OpSHA256TwoRoundsUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x8.SaturateToInt8", opLen1(ssa.OpSaturateToInt8Int16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.SaturateToInt8", opLen1(ssa.OpSaturateToInt8Int16x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x32.SaturateToInt8", opLen1(ssa.OpSaturateToInt8Int16x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x4.SaturateToInt8", opLen1(ssa.OpSaturateToInt8Int32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.SaturateToInt8", opLen1(ssa.OpSaturateToInt8Int32x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x16.SaturateToInt8", opLen1(ssa.OpSaturateToInt8Int32x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x2.SaturateToInt8", opLen1(ssa.OpSaturateToInt8Int64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.SaturateToInt8", opLen1(ssa.OpSaturateToInt8Int64x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x8.SaturateToInt8", opLen1(ssa.OpSaturateToInt8Int64x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x4.SaturateToInt16", opLen1(ssa.OpSaturateToInt16Int32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.SaturateToInt16", opLen1(ssa.OpSaturateToInt16Int32x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x16.SaturateToInt16", opLen1(ssa.OpSaturateToInt16Int32x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x2.SaturateToInt16", opLen1(ssa.OpSaturateToInt16Int64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.SaturateToInt16", opLen1(ssa.OpSaturateToInt16Int64x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x8.SaturateToInt16", opLen1(ssa.OpSaturateToInt16Int64x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x4.SaturateToInt16Concat", opLen2(ssa.OpSaturateToInt16ConcatInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.SaturateToInt16Concat", opLen2(ssa.OpSaturateToInt16ConcatInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.SaturateToInt16Concat", opLen2(ssa.OpSaturateToInt16ConcatInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.SaturateToInt32", opLen1(ssa.OpSaturateToInt32Int64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.SaturateToInt32", opLen1(ssa.OpSaturateToInt32Int64x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x8.SaturateToInt32", opLen1(ssa.OpSaturateToInt32Int64x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x8.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int16x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x4.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int32x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x16.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int32x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x2.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int64x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x8.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int64x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x32.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint16x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x4.SaturateToUint16", opLen1(ssa.OpSaturateToUint16Uint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.SaturateToUint16", opLen1(ssa.OpSaturateToUint16Uint32x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x16.SaturateToUint16", opLen1(ssa.OpSaturateToUint16Uint32x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x2.SaturateToUint16", opLen1(ssa.OpSaturateToUint16Uint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.SaturateToUint16", opLen1(ssa.OpSaturateToUint16Uint64x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x8.SaturateToUint16", opLen1(ssa.OpSaturateToUint16Uint64x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x4.SaturateToUint16Concat", opLen2(ssa.OpSaturateToUint16ConcatUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.SaturateToUint16Concat", opLen2(ssa.OpSaturateToUint16ConcatUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.SaturateToUint16Concat", opLen2(ssa.OpSaturateToUint16ConcatUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.SaturateToUint32", opLen1(ssa.OpSaturateToUint32Uint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.SaturateToUint32", opLen1(ssa.OpSaturateToUint32Uint64x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x8.SaturateToUint32", opLen1(ssa.OpSaturateToUint32Uint64x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x4.Scale", opLen2(ssa.OpScaleFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.Scale", opLen2(ssa.OpScaleFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x16.Scale", opLen2(ssa.OpScaleFloat32x16, types.TypeVec512), sys.AMD64) @@ -1236,6 +1200,42 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Float64x2.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Int16x8.TruncateToInt8", opLen1(ssa.OpTruncateToInt8Int16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.TruncateToInt8", opLen1(ssa.OpTruncateToInt8Int16x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x32.TruncateToInt8", opLen1(ssa.OpTruncateToInt8Int16x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x4.TruncateToInt8", opLen1(ssa.OpTruncateToInt8Int32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.TruncateToInt8", opLen1(ssa.OpTruncateToInt8Int32x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x16.TruncateToInt8", opLen1(ssa.OpTruncateToInt8Int32x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x2.TruncateToInt8", opLen1(ssa.OpTruncateToInt8Int64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.TruncateToInt8", opLen1(ssa.OpTruncateToInt8Int64x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x8.TruncateToInt8", opLen1(ssa.OpTruncateToInt8Int64x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x4.TruncateToInt16", opLen1(ssa.OpTruncateToInt16Int32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.TruncateToInt16", opLen1(ssa.OpTruncateToInt16Int32x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x16.TruncateToInt16", opLen1(ssa.OpTruncateToInt16Int32x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x2.TruncateToInt16", opLen1(ssa.OpTruncateToInt16Int64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.TruncateToInt16", opLen1(ssa.OpTruncateToInt16Int64x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x8.TruncateToInt16", opLen1(ssa.OpTruncateToInt16Int64x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x2.TruncateToInt32", opLen1(ssa.OpTruncateToInt32Int64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.TruncateToInt32", opLen1(ssa.OpTruncateToInt32Int64x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x8.TruncateToInt32", opLen1(ssa.OpTruncateToInt32Int64x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x8.TruncateToUint8", opLen1(ssa.OpTruncateToUint8Uint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.TruncateToUint8", opLen1(ssa.OpTruncateToUint8Uint16x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x32.TruncateToUint8", opLen1(ssa.OpTruncateToUint8Uint16x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x4.TruncateToUint8", opLen1(ssa.OpTruncateToUint8Uint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.TruncateToUint8", opLen1(ssa.OpTruncateToUint8Uint32x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x16.TruncateToUint8", opLen1(ssa.OpTruncateToUint8Uint32x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x2.TruncateToUint8", opLen1(ssa.OpTruncateToUint8Uint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.TruncateToUint8", opLen1(ssa.OpTruncateToUint8Uint64x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x8.TruncateToUint8", opLen1(ssa.OpTruncateToUint8Uint64x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x4.TruncateToUint16", opLen1(ssa.OpTruncateToUint16Uint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.TruncateToUint16", opLen1(ssa.OpTruncateToUint16Uint32x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x16.TruncateToUint16", opLen1(ssa.OpTruncateToUint16Uint32x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x2.TruncateToUint16", opLen1(ssa.OpTruncateToUint16Uint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.TruncateToUint16", opLen1(ssa.OpTruncateToUint16Uint64x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x8.TruncateToUint16", opLen1(ssa.OpTruncateToUint16Uint64x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x2.TruncateToUint32", opLen1(ssa.OpTruncateToUint32Uint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.TruncateToUint32", opLen1(ssa.OpTruncateToUint32Uint64x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x8.TruncateToUint32", opLen1(ssa.OpTruncateToUint32Uint64x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x16.Xor", opLen2(ssa.OpXorInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x32.Xor", opLen2(ssa.OpXorInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x64.Xor", opLen2(ssa.OpXorInt8x64, types.TypeVec512), sys.AMD64) diff --git a/src/simd/_gen/simdgen/ops/Compares/go.yaml b/src/simd/_gen/simdgen/ops/Compares/go.yaml index 0f9162839c9..3f6c8a45b68 100644 --- a/src/simd/_gen/simdgen/ops/Compares/go.yaml +++ b/src/simd/_gen/simdgen/ops/Compares/go.yaml @@ -57,6 +57,7 @@ # MASKED signed comparisons for X/Y registers # unmasked would clash with emulations on AVX2 - go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) + regexpTag: "compares" asm: "VPCMP[BWDQ]" in: - &int @@ -74,6 +75,7 @@ # MASKED unsigned comparisons for X/Y registers # unmasked would clash with emulations on AVX2 - go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) + regexpTag: "compares" asm: "VPCMPU[BWDQ]" in: - &uint @@ -90,6 +92,7 @@ # masked/unmasked signed comparisons for Z registers - go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) + regexpTag: "compares" asm: "VPCMP[BWDQ]" in: - &int @@ -104,6 +107,7 @@ # masked/unmasked unsigned comparisons for Z registers - go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) + regexpTag: "compares" asm: "VPCMPU[BWDQ]" in: - &uint @@ -118,6 +122,7 @@ # Floats - go: Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan + regexpTag: "compares" asm: "VCMPP[SD]" in: - &float @@ -131,6 +136,7 @@ overwriteBase: int overwriteClass: mask - go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan) + regexpTag: "compares" asm: "VCMPP[SD]" in: - *float diff --git a/src/simd/_gen/simdgen/ops/Converts/categories.yaml b/src/simd/_gen/simdgen/ops/Converts/categories.yaml index 9f02960862a..9abdf454d64 100644 --- a/src/simd/_gen/simdgen/ops/Converts/categories.yaml +++ b/src/simd/_gen/simdgen/ops/Converts/categories.yaml @@ -1,118 +1,126 @@ !sum # Non-truncating conversions # int<->int or uint<->uint widening, float<->int|uint conversions or trucating conversions. -- go: ConvertToInt8 +- go: "(Extend|Saturate|Truncate)?ToInt8" commutative: false + regexpTag: "convert" documentation: !string |- // NAME converts element values to int8. -- go: ConvertToInt16 +- go: "(Extend|Saturate|Truncate)?ToInt16(Concat)?" commutative: false + regexpTag: "convert" documentation: !string |- // NAME converts element values to int16. -- go: ConvertToInt32 +- go: "(Extend|Saturate|Truncate)?(Convert)?ToInt32" commutative: false + regexpTag: "convert" documentation: !string |- // NAME converts element values to int32. -- go: ConvertToInt64 +- go: "(Extend|Saturate|Truncate)?ToInt64" commutative: false + regexpTag: "convert" documentation: !string |- // NAME converts element values to int64. -- go: ConvertToUint8 +- go: "(Extend|Saturate|Truncate)?ToUint8" commutative: false + regexpTag: "convert" documentation: !string |- // NAME converts element values to uint8. -- go: ConvertToUint16 +- go: "(Extend|Saturate|Truncate)?ToUint16(Concat)?" commutative: false + regexpTag: "convert" documentation: !string |- // NAME converts element values to uint16. -- go: ConvertToUint32 +- go: "(Extend|Saturate|Truncate)?(Convert)?ToUint32" + regexpTag: "convert" commutative: false documentation: !string |- // NAME converts element values to uint32. -- go: ConvertToUint64 +- go: "(Extend|Saturate|Truncate)?ToUint64" + regexpTag: "convert" commutative: false documentation: !string |- // NAME converts element values to uint64. -- go: ConvertToInt8Saturated - commutative: false - documentation: !string |- - // NAME converts element values to int8 with saturation. -- go: ConvertToInt16Saturated - commutative: false - documentation: !string |- - // NAME converts element values to int16 with saturation. -- go: ConvertToInt32Saturated - commutative: false - documentation: !string |- - // NAME converts element values to int32 with saturation. -- go: ConvertToUint8Saturated - commutative: false - documentation: !string |- - // NAME converts element values to uint8 with saturation. -- go: ConvertToUint16Saturated - commutative: false - documentation: !string |- - // NAME converts element values to uint16 with saturation. -- go: ConvertToUint32Saturated - commutative: false - documentation: !string |- - // NAME converts element values to uint32 with saturation. -- go: ConvertToInt16SaturatedPacked - commutative: false - documentation: !string |- - // NAME converts element values to int16 with saturation. -- go: ConvertToUint16SaturatedPacked - commutative: false - documentation: !string |- - // NAME converts element values to uint16 with saturation. # low-part only conversions # int<->int or uint<->uint widening conversions. -- go: ConvertToInt16x8 - commutative: false - documentation: !string |- - // NAME converts 8 lowest vector element values to int16. -- go: ConvertToInt32x4 - commutative: false - documentation: !string |- - // NAME converts 4 lowest vector element values to int32. -- go: ConvertToInt32x8 - commutative: false - documentation: !string |- - // NAME converts 8 lowest vector element values to int32. -- go: ConvertToInt64x2 - commutative: false - documentation: !string |- - // NAME converts 2 lowest vector element values to int64. -- go: ConvertToInt64x4 - commutative: false - documentation: !string |- - // NAME converts 4 lowest vector element values to int64. -- go: ConvertToInt64x8 - commutative: false - documentation: !string |- - // NAME converts 8 lowest vector element values to int64. -- go: ConvertToUint16x8 +- go: ExtendLo8ToUint16x8 commutative: false documentation: !string |- // NAME converts 8 lowest vector element values to uint16. -- go: ConvertToUint32x4 +- go: ExtendLo8ToInt16x8 + commutative: false + documentation: !string |- + // NAME converts 8 lowest vector element values to int16. +- go: ExtendLo4ToUint32x4 commutative: false documentation: !string |- // NAME converts 4 lowest vector element values to uint32. -- go: ConvertToUint32x8 +- go: ExtendLo4ToInt32x4 commutative: false documentation: !string |- - // NAME converts 8 lowest vector element values to uint32. -- go: ConvertToUint64x2 + // NAME converts 4 lowest vector element values to int32. +- go: ExtendLo2ToUint64x2 commutative: false documentation: !string |- // NAME converts 2 lowest vector element values to uint64. -- go: ConvertToUint64x4 +- go: ExtendLo2ToInt64x2 + commutative: false + documentation: !string |- + // NAME converts 2 lowest vector element values to int64. +- go: ExtendLo2ToUint64x2 + commutative: false + documentation: !string |- + // NAME converts 2 lowest vector element values to uint64. +- go: ExtendLo4ToUint64x4 commutative: false documentation: !string |- // NAME converts 4 lowest vector element values to uint64. -- go: ConvertToUint64x8 +- go: ExtendLo2ToInt64x2 commutative: false documentation: !string |- - // NAME converts 8 lowest vector element values to uint64. \ No newline at end of file + // NAME converts 2 lowest vector element values to int64. +- go: ExtendLo4ToInt64x4 + commutative: false + documentation: !string |- + // NAME converts 4 lowest vector element values to int64. +- go: ExtendLo4ToUint32x4 + commutative: false + documentation: !string |- + // NAME converts 4 lowest vector element values to uint32. +- go: ExtendLo8ToUint32x8 + commutative: false + documentation: !string |- + // NAME converts 8 lowest vector element values to uint32. +- go: ExtendLo4ToInt32x4 + commutative: false + documentation: !string |- + // NAME converts 4 lowest vector element values to int32. +- go: ExtendLo8ToInt32x8 + commutative: false + documentation: !string |- + // NAME converts 8 lowest vector element values to int32. +- go: ExtendLo2ToUint64x2 + commutative: false + documentation: !string |- + // NAME converts 2 lowest vector element values to uint64. +- go: ExtendLo4ToUint64x4 + commutative: false + documentation: !string |- + // NAME converts 4 lowest vector element values to uint64. +- go: ExtendLo8ToUint64x8 + commutative: false + documentation: !string |- + // NAME converts 8 lowest vector element values to uint64. +- go: ExtendLo2ToInt64x2 + commutative: false + documentation: !string |- + // NAME converts 2 lowest vector element values to int64. +- go: ExtendLo4ToInt64x4 + commutative: false + documentation: !string |- + // NAME converts 4 lowest vector element values to int64. +- go: ExtendLo8ToInt64x8 + commutative: false + documentation: !string |- + // NAME converts 8 lowest vector element values to int64. \ No newline at end of file diff --git a/src/simd/_gen/simdgen/ops/Converts/go.yaml b/src/simd/_gen/simdgen/ops/Converts/go.yaml index a82ae377dd6..88e43b8dbfe 100644 --- a/src/simd/_gen/simdgen/ops/Converts/go.yaml +++ b/src/simd/_gen/simdgen/ops/Converts/go.yaml @@ -2,6 +2,7 @@ # Float <-> Int conversions # float32 -> int32 - go: ConvertToInt32 + regexpTag: "convert" asm: "VCVTTPS2DQ" in: - &fp @@ -14,6 +15,7 @@ elemBits: 32 # float32 -> uint32 - go: ConvertToUint32 + regexpTag: "convert" asm: "VCVTPS2UDQ" in: - *fp @@ -24,7 +26,11 @@ elemBits: 32 # Widening integer conversions. # uint8 -> uint16 -- go: ConvertToUint16 +- go: ExtendToUint16 + addDoc: &zeroExtendDoc + !string |- + // The result vector's elements are zero-extended. + regexpTag: "convert" asm: "VPMOVZXBW" in: - &u8x16 @@ -36,8 +42,10 @@ base: uint elemBits: 16 bits: 256 -- go: ConvertToUint16 +- go: ExtendToUint16 + regexpTag: "convert" asm: "VPMOVZXBW" + addDoc: *zeroExtendDoc in: - &u8x32 base: uint @@ -49,8 +57,12 @@ elemBits: 16 bits: 512 # int8 -> int16 -- go: ConvertToInt16 +- go: ExtendToInt16 + regexpTag: "convert" asm: "VPMOVSXBW" + addDoc: &signExtendDoc + !string |- + // The result vector's elements are sign-extended. in: - &i8x16 base: int @@ -61,8 +73,10 @@ base: int elemBits: 16 bits: 256 -- go: ConvertToInt16 +- go: ExtendToInt16 + regexpTag: "convert" asm: "VPMOVSXBW" + addDoc: *signExtendDoc in: - &i8x32 base: int @@ -74,8 +88,10 @@ elemBits: 16 bits: 512 # uint16->uint32 -- go: ConvertToUint32 +- go: ExtendToUint32 + regexpTag: "convert" asm: "VPMOVZXWD" + addDoc: *zeroExtendDoc in: - &u16x8 base: uint @@ -86,8 +102,10 @@ base: uint elemBits: 32 bits: 256 -- go: ConvertToUint32 +- go: ExtendToUint32 + regexpTag: "convert" asm: "VPMOVZXWD" + addDoc: *zeroExtendDoc in: - *u16x16 out: @@ -96,8 +114,10 @@ elemBits: 32 bits: 512 # int16->int32 -- go: ConvertToInt32 +- go: ExtendToInt32 + regexpTag: "convert" asm: "VPMOVSXWD" + addDoc: *signExtendDoc in: - &i16x8 base: int @@ -108,8 +128,10 @@ base: int elemBits: 32 bits: 256 -- go: ConvertToInt32 +- go: ExtendToInt32 + regexpTag: "convert" asm: "VPMOVSXWD" + addDoc: *signExtendDoc in: - *i16x16 out: @@ -118,8 +140,10 @@ elemBits: 32 bits: 512 # uint32 -> uint64 -- go: ConvertToUint64 +- go: ExtendToUint64 + regexpTag: "convert" asm: "VPMOVZXDQ" + addDoc: *zeroExtendDoc in: - &u32x4 base: uint @@ -130,8 +154,10 @@ base: uint elemBits: 64 bits: 256 -- go: ConvertToUint64 +- go: ExtendToUint64 + regexpTag: "convert" asm: "VPMOVZXDQ" + addDoc: *zeroExtendDoc in: - *u32x8 out: @@ -140,8 +166,10 @@ elemBits: 64 bits: 512 # int32 -> int64 -- go: ConvertToInt64 +- go: ExtendToInt64 + regexpTag: "convert" asm: "VPMOVSXDQ" + addDoc: *signExtendDoc in: - &i32x4 base: int @@ -152,8 +180,10 @@ base: int elemBits: 64 bits: 256 -- go: ConvertToInt64 +- go: ExtendToInt64 + regexpTag: "convert" asm: "VPMOVSXDQ" + addDoc: *signExtendDoc in: - *i32x8 out: @@ -162,37 +192,46 @@ elemBits: 64 bits: 512 # uint16 -> uint64 -- go: ConvertToUint64 +- go: ExtendToUint64 + regexpTag: "convert" asm: "VPMOVZXWQ" + addDoc: *zeroExtendDoc in: - *u16x8 out: - *u64x8 # int16 -> int64 -- go: ConvertToInt64 +- go: ExtendToInt64 + regexpTag: "convert" asm: "VPMOVSXWQ" + addDoc: *signExtendDoc in: - *i16x8 out: - *i64x8 # uint8 -> uint32 -- go: ConvertToUint32 +- go: ExtendToUint32 + regexpTag: "convert" asm: "VPMOVZXBD" + addDoc: *zeroExtendDoc in: - *u8x16 out: - *u32x16 # int8 -> int32 -- go: ConvertToInt32 +- go: ExtendToInt32 + regexpTag: "convert" asm: "VPMOVSXBD" + addDoc: *signExtendDoc in: - *i8x16 out: - *i32x16 # Truncating conversions -- go: ConvertToInt8 +- go: TruncateToInt8 + regexpTag: "convert" asm: "VPMOV[WDQ]B" - addDoc: &truncDoc + addDoc: &truncDocZeroUpper !string |- // Conversion is done with truncation on the vector elements. // Results are packed to low elements in the returned vector, its upper elements are zero-cleared. @@ -200,35 +239,62 @@ - base: int out: - base: int -- go: ConvertToUint8 + bits: 128 +- go: TruncateToUint8 + regexpTag: "convert" + asm: "VPMOV[WDQ]B" + addDoc: *truncDocZeroUpper + in: + - base: uint + out: + - base: uint + bits: 128 +- go: TruncateToInt8 + regexpTag: "convert" + asm: "VPMOV[WDQ]B" + addDoc: &truncDoc + !string |- + // Conversion is done with truncation on the vector elements. + in: + - base: int + out: + - base: int + bits: 256|512 +- go: TruncateToUint8 + regexpTag: "convert" asm: "VPMOV[WDQ]B" addDoc: *truncDoc in: - base: uint out: - base: uint -- go: ConvertToInt16 + bits: 256|512 +- go: TruncateToInt16 + regexpTag: "convert" asm: "VPMOV[DQ]W" addDoc: *truncDoc in: - base: int out: - base: int -- go: ConvertToUint16 +- go: TruncateToUint16 + regexpTag: "convert" asm: "VPMOV[DQ]W" addDoc: *truncDoc in: - base: uint out: - base: uint -- go: ConvertToInt32 +- go: TruncateToInt32 + regexpTag: "convert" asm: "VPMOVQD" addDoc: *truncDoc in: - base: int out: - base: int -- go: ConvertToUint32 +- go: TruncateToUint32 + regexpTag: "convert" asm: "VPMOVQD" addDoc: *truncDoc in: @@ -236,44 +302,73 @@ out: - base: uint # Saturated conversions. -- go: ConvertToInt8Saturated +- go: SaturateToInt8 + regexpTag: "convert" asm: "VPMOVS[WDQ]B" - addDoc: &satDoc + addDoc: &satDocZeroUpper !string |- + // Conversion is done with saturation on the vector elements. // Results are packed to low elements in the returned vector, its upper elements are zero-cleared. in: - base: int out: - base: int -- go: ConvertToUint8Saturated + bits: 128 +- go: SaturateToUint8 + regexpTag: "convert" + asm: "VPMOVS[WDQ]B" + addDoc: *satDocZeroUpper + in: + - base: int + out: + - base: int + bits: 128 +- go: SaturateToInt8 + regexpTag: "convert" + asm: "VPMOVS[WDQ]B" + addDoc: &satDoc + !string |- + // Conversion is done with saturation on the vector elements. + in: + - base: int + out: + - base: int + bits: 256|512 +- go: SaturateToUint8 + regexpTag: "convert" asm: "VPMOVUS[WDQ]B" addDoc: *satDoc in: - base: uint out: - base: uint -- go: ConvertToInt16Saturated + bits: 256|512 +- go: SaturateToInt16 + regexpTag: "convert" asm: "VPMOVS[DQ]W" addDoc: *satDoc in: - base: int out: - base: int -- go: ConvertToUint16Saturated +- go: SaturateToUint16 + regexpTag: "convert" asm: "VPMOVUS[DQ]W" addDoc: *satDoc in: - base: uint out: - base: uint -- go: ConvertToInt32Saturated +- go: SaturateToInt32 + regexpTag: "convert" asm: "VPMOVSQD" addDoc: *satDoc in: - base: int out: - base: int -- go: ConvertToUint32Saturated +- go: SaturateToUint32 + regexpTag: "convert" asm: "VPMOVUSQD" addDoc: *satDoc in: @@ -281,21 +376,24 @@ out: - base: uint # Truncating saturated packed -- go: ConvertToInt16SaturatedPacked +- go: SaturateToInt16Concat + regexpTag: "convert" asm: "VPACKSSDW" - addDoc: &satDocPacked + addDoc: &satDocConcat !string |- // With each 128-bit as a group: // The converted group from the first input vector will be packed to the lower part of the result vector, - // the converted group from the second second input vector will be packed to the upper part of the result vector. + // the converted group from the second input vector will be packed to the upper part of the result vector. + // Conversion is done with saturation on the vector elements. in: - base: int - base: int out: - base: int -- go: ConvertToUint16SaturatedPacked +- go: SaturateToUint16Concat + regexpTag: "convert" asm: "VPACKUSDW" - addDoc: *satDocPacked + addDoc: *satDocConcat in: - base: uint - base: uint @@ -304,36 +402,46 @@ # low-part only conversions. # uint8->uint16 -- go: ConvertToUint16x8 +- go: ExtendLo8ToUint16x8 + regexpTag: "convert" asm: "VPMOVZXBW" + addDoc: *zeroExtendDoc in: - *u8x16 out: - *u16x8 # int8->int16 -- go: ConvertToInt16x8 +- go: ExtendLo8ToInt16x8 + regexpTag: "convert" asm: "VPMOVSXBW" + addDoc: *signExtendDoc in: - *i8x16 out: - *i16x8 # uint16->uint32 -- go: ConvertToUint32x4 +- go: ExtendLo4ToUint32x4 + regexpTag: "convert" asm: "VPMOVZXWD" + addDoc: *zeroExtendDoc in: - *u16x8 out: - *u32x4 # int16->int32 -- go: ConvertToInt32x4 +- go: ExtendLo4ToInt32x4 + regexpTag: "convert" asm: "VPMOVSXWD" + addDoc: *signExtendDoc in: - *i16x8 out: - *i32x4 # uint32 -> uint64 -- go: ConvertToUint64x2 +- go: ExtendLo2ToUint64x2 + regexpTag: "convert" asm: "VPMOVZXDQ" + addDoc: *zeroExtendDoc in: - *u32x4 out: @@ -342,8 +450,10 @@ elemBits: 64 bits: 128 # int32 -> int64 -- go: ConvertToInt64x2 +- go: ExtendLo2ToInt64x2 + regexpTag: "convert" asm: "VPMOVSXDQ" + addDoc: *signExtendDoc in: - *i32x4 out: @@ -352,91 +462,119 @@ elemBits: 64 bits: 128 # uint16 -> uint64 -- go: ConvertToUint64x2 +- go: ExtendLo2ToUint64x2 + regexpTag: "convert" asm: "VPMOVZXWQ" + addDoc: *zeroExtendDoc in: - *u16x8 out: - *u64x2 -- go: ConvertToUint64x4 +- go: ExtendLo4ToUint64x4 + regexpTag: "convert" asm: "VPMOVZXWQ" + addDoc: *zeroExtendDoc in: - *u16x8 out: - *u64x4 # int16 -> int64 -- go: ConvertToInt64x2 +- go: ExtendLo2ToInt64x2 + regexpTag: "convert" asm: "VPMOVSXWQ" + addDoc: *signExtendDoc in: - *i16x8 out: - *i64x2 -- go: ConvertToUint64x4 +- go: ExtendLo4ToInt64x4 + regexpTag: "convert" asm: "VPMOVSXWQ" + addDoc: *signExtendDoc in: - *i16x8 out: - *i64x4 # uint8 -> uint32 -- go: ConvertToUint32x4 +- go: ExtendLo4ToUint32x4 + regexpTag: "convert" asm: "VPMOVZXBD" + addDoc: *zeroExtendDoc in: - *u8x16 out: - *u32x4 -- go: ConvertToUint32x8 +- go: ExtendLo8ToUint32x8 + regexpTag: "convert" asm: "VPMOVZXBD" + addDoc: *zeroExtendDoc in: - *u8x16 out: - *u32x8 # int8 -> int32 -- go: ConvertToInt32x4 +- go: ExtendLo4ToInt32x4 + regexpTag: "convert" asm: "VPMOVSXBD" + addDoc: *signExtendDoc in: - *i8x16 out: - *i32x4 -- go: ConvertToInt32x8 +- go: ExtendLo8ToInt32x8 + regexpTag: "convert" asm: "VPMOVSXBD" + addDoc: *signExtendDoc in: - *i8x16 out: - *i32x8 # uint8 -> uint64 -- go: ConvertToUint64x2 +- go: ExtendLo2ToUint64x2 + regexpTag: "convert" asm: "VPMOVZXBQ" + addDoc: *zeroExtendDoc in: - *u8x16 out: - *u64x2 -- go: ConvertToUint64x4 +- go: ExtendLo4ToUint64x4 + regexpTag: "convert" asm: "VPMOVZXBQ" + addDoc: *zeroExtendDoc in: - *u8x16 out: - *u64x4 -- go: ConvertToUint64x8 +- go: ExtendLo8ToUint64x8 + regexpTag: "convert" asm: "VPMOVZXBQ" + addDoc: *zeroExtendDoc in: - *u8x16 out: - *u64x8 # int8 -> int64 -- go: ConvertToInt64x2 +- go: ExtendLo2ToInt64x2 + regexpTag: "convert" asm: "VPMOVSXBQ" + addDoc: *signExtendDoc in: - *i8x16 out: - *i64x2 -- go: ConvertToInt64x4 +- go: ExtendLo4ToInt64x4 + regexpTag: "convert" asm: "VPMOVSXBQ" + addDoc: *signExtendDoc in: - *i8x16 out: - *i64x4 -- go: ConvertToInt64x8 +- go: ExtendLo8ToInt64x8 + regexpTag: "convert" asm: "VPMOVSXBQ" + addDoc: *signExtendDoc in: - *i8x16 out: diff --git a/src/simd/_gen/simdgen/ops/FPonlyArith/go.yaml b/src/simd/_gen/simdgen/ops/FPonlyArith/go.yaml index e164f7b70a4..303647b2b8a 100644 --- a/src/simd/_gen/simdgen/ops/FPonlyArith/go.yaml +++ b/src/simd/_gen/simdgen/ops/FPonlyArith/go.yaml @@ -27,6 +27,7 @@ out: *1fp - go: "RoundToEven|Ceil|Floor|Trunc" + regexpTag: "fp" asm: "VROUNDP[SD]" in: - *fp @@ -35,6 +36,7 @@ out: *1fp - go: "(RoundToEven|Ceil|Floor|Trunc)Scaled" + regexpTag: "fp" asm: "VRNDSCALEP[SD]" in: - *fp @@ -44,6 +46,7 @@ name: prec out: *1fp - go: "(RoundToEven|Ceil|Floor|Trunc)ScaledResidue" + regexpTag: "fp" asm: "VREDUCEP[SD]" in: - *fp diff --git a/src/simd/_gen/simdgen/ops/Moves/go.yaml b/src/simd/_gen/simdgen/ops/Moves/go.yaml index bbea29bcb0a..726a983ac42 100644 --- a/src/simd/_gen/simdgen/ops/Moves/go.yaml +++ b/src/simd/_gen/simdgen/ops/Moves/go.yaml @@ -60,6 +60,7 @@ OverwriteBase: float - go: "SetHi|SetLo" + regexpTag: "move" asm: "VINSERTI128|VINSERTI64X4" inVariant: [] in: @@ -80,6 +81,7 @@ - go: "GetHi|GetLo" asm: "VEXTRACTI128|VEXTRACTI64X4" + regexpTag: "move" inVariant: [] in: - *i8x2N @@ -89,6 +91,7 @@ - go: "SetHi|SetLo" asm: "VINSERTI128|VINSERTI64X4" + regexpTag: "move" inVariant: [] in: - &i16x2N @@ -104,6 +107,7 @@ - *i16x2N - go: "GetHi|GetLo" + regexpTag: "move" asm: "VEXTRACTI128|VEXTRACTI64X4" inVariant: [] in: @@ -113,6 +117,7 @@ - *i16xN - go: "SetHi|SetLo" + regexpTag: "move" asm: "VINSERTI128|VINSERTI64X4" inVariant: [] in: @@ -129,6 +134,7 @@ - *i32x2N - go: "GetHi|GetLo" + regexpTag: "move" asm: "VEXTRACTI128|VEXTRACTI64X4" inVariant: [] in: @@ -138,6 +144,7 @@ - *i32xN - go: "SetHi|SetLo" + regexpTag: "move" asm: "VINSERTI128|VINSERTI64X4" inVariant: [] in: @@ -154,6 +161,7 @@ - *i64x2N - go: "GetHi|GetLo" + regexpTag: "move" asm: "VEXTRACTI128|VEXTRACTI64X4" inVariant: [] in: @@ -163,6 +171,7 @@ - *i64xN - go: "SetHi|SetLo" + regexpTag: "move" asm: "VINSERTF128|VINSERTF64X4" inVariant: [] in: @@ -179,6 +188,7 @@ - *f32x2N - go: "GetHi|GetLo" + regexpTag: "move" asm: "VEXTRACTF128|VEXTRACTF64X4" inVariant: [] in: @@ -188,6 +198,7 @@ - *f32xN - go: "SetHi|SetLo" + regexpTag: "move" asm: "VINSERTF128|VINSERTF64X4" inVariant: [] in: @@ -204,6 +215,7 @@ - *f64x2N - go: "GetHi|GetLo" + regexpTag: "move" asm: "VEXTRACTF128|VEXTRACTF64X4" inVariant: [] in: diff --git a/src/simd/internal/simd_test/unary_test.go b/src/simd/internal/simd_test/unary_test.go index 4fb197700b3..ea4c1149922 100644 --- a/src/simd/internal/simd_test/unary_test.go +++ b/src/simd/internal/simd_test/unary_test.go @@ -125,13 +125,13 @@ func TestToInt32(t *testing.T) { } func TestConverts(t *testing.T) { - testUint8x16ConvertToUint16(t, simd.Uint8x16.ConvertToUint16, map1[uint8](toUint16)) - testUint16x8ConvertToUint32(t, simd.Uint16x8.ConvertToUint32, map1[uint16](toUint32)) + testUint8x16ConvertToUint16(t, simd.Uint8x16.ExtendToUint16, map1[uint8](toUint16)) + testUint16x8ConvertToUint32(t, simd.Uint16x8.ExtendToUint32, map1[uint16](toUint32)) } func TestConvertsAVX512(t *testing.T) { if !simd.X86.AVX512() { t.Skip("Needs AVX512") } - testUint8x32ConvertToUint16(t, simd.Uint8x32.ConvertToUint16, map1[uint8](toUint16)) + testUint8x32ConvertToUint16(t, simd.Uint8x32.ExtendToUint16, map1[uint8](toUint16)) } diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go index 2be59cf4858..38d984622d6 100644 --- a/src/simd/ops_amd64.go +++ b/src/simd/ops_amd64.go @@ -1544,252 +1544,6 @@ func (x Uint8x32) ConcatShiftBytesRightGrouped(constant uint8, y Uint8x32) Uint8 // Asm: VPALIGNR, CPU Feature: AVX512 func (x Uint8x64) ConcatShiftBytesRightGrouped(constant uint8, y Uint8x64) Uint8x64 -/* ConvertToInt8 */ - -// ConvertToInt8 converts element values to int8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVWB, CPU Feature: AVX512 -func (x Int16x8) ConvertToInt8() Int8x16 - -// ConvertToInt8 converts element values to int8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVWB, CPU Feature: AVX512 -func (x Int16x16) ConvertToInt8() Int8x16 - -// ConvertToInt8 converts element values to int8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVWB, CPU Feature: AVX512 -func (x Int16x32) ConvertToInt8() Int8x32 - -// ConvertToInt8 converts element values to int8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVDB, CPU Feature: AVX512 -func (x Int32x4) ConvertToInt8() Int8x16 - -// ConvertToInt8 converts element values to int8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVDB, CPU Feature: AVX512 -func (x Int32x8) ConvertToInt8() Int8x16 - -// ConvertToInt8 converts element values to int8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVDB, CPU Feature: AVX512 -func (x Int32x16) ConvertToInt8() Int8x16 - -// ConvertToInt8 converts element values to int8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVQB, CPU Feature: AVX512 -func (x Int64x2) ConvertToInt8() Int8x16 - -// ConvertToInt8 converts element values to int8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVQB, CPU Feature: AVX512 -func (x Int64x4) ConvertToInt8() Int8x16 - -// ConvertToInt8 converts element values to int8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVQB, CPU Feature: AVX512 -func (x Int64x8) ConvertToInt8() Int8x16 - -/* ConvertToInt8Saturated */ - -// ConvertToInt8Saturated converts element values to int8 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVSWB, CPU Feature: AVX512 -func (x Int16x8) ConvertToInt8Saturated() Int8x16 - -// ConvertToInt8Saturated converts element values to int8 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVSWB, CPU Feature: AVX512 -func (x Int16x16) ConvertToInt8Saturated() Int8x16 - -// ConvertToInt8Saturated converts element values to int8 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVSWB, CPU Feature: AVX512 -func (x Int16x32) ConvertToInt8Saturated() Int8x32 - -// ConvertToInt8Saturated converts element values to int8 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVSDB, CPU Feature: AVX512 -func (x Int32x4) ConvertToInt8Saturated() Int8x16 - -// ConvertToInt8Saturated converts element values to int8 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVSDB, CPU Feature: AVX512 -func (x Int32x8) ConvertToInt8Saturated() Int8x16 - -// ConvertToInt8Saturated converts element values to int8 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVSDB, CPU Feature: AVX512 -func (x Int32x16) ConvertToInt8Saturated() Int8x16 - -// ConvertToInt8Saturated converts element values to int8 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVSQB, CPU Feature: AVX512 -func (x Int64x2) ConvertToInt8Saturated() Int8x16 - -// ConvertToInt8Saturated converts element values to int8 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVSQB, CPU Feature: AVX512 -func (x Int64x4) ConvertToInt8Saturated() Int8x16 - -// ConvertToInt8Saturated converts element values to int8 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVSQB, CPU Feature: AVX512 -func (x Int64x8) ConvertToInt8Saturated() Int8x16 - -/* ConvertToInt16 */ - -// ConvertToInt16 converts element values to int16. -// -// Asm: VPMOVSXBW, CPU Feature: AVX2 -func (x Int8x16) ConvertToInt16() Int16x16 - -// ConvertToInt16 converts element values to int16. -// -// Asm: VPMOVSXBW, CPU Feature: AVX512 -func (x Int8x32) ConvertToInt16() Int16x32 - -// ConvertToInt16 converts element values to int16. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVDW, CPU Feature: AVX512 -func (x Int32x4) ConvertToInt16() Int16x8 - -// ConvertToInt16 converts element values to int16. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVDW, CPU Feature: AVX512 -func (x Int32x8) ConvertToInt16() Int16x8 - -// ConvertToInt16 converts element values to int16. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVDW, CPU Feature: AVX512 -func (x Int32x16) ConvertToInt16() Int16x16 - -// ConvertToInt16 converts element values to int16. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVQW, CPU Feature: AVX512 -func (x Int64x2) ConvertToInt16() Int16x8 - -// ConvertToInt16 converts element values to int16. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVQW, CPU Feature: AVX512 -func (x Int64x4) ConvertToInt16() Int16x8 - -// ConvertToInt16 converts element values to int16. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVQW, CPU Feature: AVX512 -func (x Int64x8) ConvertToInt16() Int16x8 - -/* ConvertToInt16Saturated */ - -// ConvertToInt16Saturated converts element values to int16 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVSDW, CPU Feature: AVX512 -func (x Int32x4) ConvertToInt16Saturated() Int16x8 - -// ConvertToInt16Saturated converts element values to int16 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVSDW, CPU Feature: AVX512 -func (x Int32x8) ConvertToInt16Saturated() Int16x8 - -// ConvertToInt16Saturated converts element values to int16 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVSDW, CPU Feature: AVX512 -func (x Int32x16) ConvertToInt16Saturated() Int16x16 - -// ConvertToInt16Saturated converts element values to int16 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVSQW, CPU Feature: AVX512 -func (x Int64x2) ConvertToInt16Saturated() Int16x8 - -// ConvertToInt16Saturated converts element values to int16 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVSQW, CPU Feature: AVX512 -func (x Int64x4) ConvertToInt16Saturated() Int16x8 - -// ConvertToInt16Saturated converts element values to int16 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVSQW, CPU Feature: AVX512 -func (x Int64x8) ConvertToInt16Saturated() Int16x8 - -/* ConvertToInt16SaturatedPacked */ - -// ConvertToInt16SaturatedPacked converts element values to int16 with saturation. -// With each 128-bit as a group: -// The converted group from the first input vector will be packed to the lower part of the result vector, -// the converted group from the second second input vector will be packed to the upper part of the result vector. -// -// Asm: VPACKSSDW, CPU Feature: AVX -func (x Int32x4) ConvertToInt16SaturatedPacked(y Int32x4) Int16x8 - -// ConvertToInt16SaturatedPacked converts element values to int16 with saturation. -// With each 128-bit as a group: -// The converted group from the first input vector will be packed to the lower part of the result vector, -// the converted group from the second second input vector will be packed to the upper part of the result vector. -// -// Asm: VPACKSSDW, CPU Feature: AVX2 -func (x Int32x8) ConvertToInt16SaturatedPacked(y Int32x8) Int16x16 - -// ConvertToInt16SaturatedPacked converts element values to int16 with saturation. -// With each 128-bit as a group: -// The converted group from the first input vector will be packed to the lower part of the result vector, -// the converted group from the second second input vector will be packed to the upper part of the result vector. -// -// Asm: VPACKSSDW, CPU Feature: AVX512 -func (x Int32x16) ConvertToInt16SaturatedPacked(y Int32x16) Int16x32 - -/* ConvertToInt16x8 */ - -// ConvertToInt16x8 converts 8 lowest vector element values to int16. -// -// Asm: VPMOVSXBW, CPU Feature: AVX -func (x Int8x16) ConvertToInt16x8() Int16x8 - /* ConvertToInt32 */ // ConvertToInt32 converts element values to int32. @@ -1807,375 +1561,6 @@ func (x Float32x8) ConvertToInt32() Int32x8 // Asm: VCVTTPS2DQ, CPU Feature: AVX512 func (x Float32x16) ConvertToInt32() Int32x16 -// ConvertToInt32 converts element values to int32. -// -// Asm: VPMOVSXBD, CPU Feature: AVX512 -func (x Int8x16) ConvertToInt32() Int32x16 - -// ConvertToInt32 converts element values to int32. -// -// Asm: VPMOVSXWD, CPU Feature: AVX2 -func (x Int16x8) ConvertToInt32() Int32x8 - -// ConvertToInt32 converts element values to int32. -// -// Asm: VPMOVSXWD, CPU Feature: AVX512 -func (x Int16x16) ConvertToInt32() Int32x16 - -// ConvertToInt32 converts element values to int32. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVQD, CPU Feature: AVX512 -func (x Int64x2) ConvertToInt32() Int32x4 - -// ConvertToInt32 converts element values to int32. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVQD, CPU Feature: AVX512 -func (x Int64x4) ConvertToInt32() Int32x4 - -// ConvertToInt32 converts element values to int32. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVQD, CPU Feature: AVX512 -func (x Int64x8) ConvertToInt32() Int32x8 - -/* ConvertToInt32Saturated */ - -// ConvertToInt32Saturated converts element values to int32 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVSQD, CPU Feature: AVX512 -func (x Int64x2) ConvertToInt32Saturated() Int32x4 - -// ConvertToInt32Saturated converts element values to int32 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVSQD, CPU Feature: AVX512 -func (x Int64x4) ConvertToInt32Saturated() Int32x4 - -// ConvertToInt32Saturated converts element values to int32 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVSQD, CPU Feature: AVX512 -func (x Int64x8) ConvertToInt32Saturated() Int32x8 - -/* ConvertToInt32x4 */ - -// ConvertToInt32x4 converts 4 lowest vector element values to int32. -// -// Asm: VPMOVSXBD, CPU Feature: AVX -func (x Int8x16) ConvertToInt32x4() Int32x4 - -// ConvertToInt32x4 converts 4 lowest vector element values to int32. -// -// Asm: VPMOVSXWD, CPU Feature: AVX -func (x Int16x8) ConvertToInt32x4() Int32x4 - -/* ConvertToInt32x8 */ - -// ConvertToInt32x8 converts 8 lowest vector element values to int32. -// -// Asm: VPMOVSXBD, CPU Feature: AVX2 -func (x Int8x16) ConvertToInt32x8() Int32x8 - -/* ConvertToInt64 */ - -// ConvertToInt64 converts element values to int64. -// -// Asm: VPMOVSXWQ, CPU Feature: AVX512 -func (x Int16x8) ConvertToInt64() Int64x8 - -// ConvertToInt64 converts element values to int64. -// -// Asm: VPMOVSXDQ, CPU Feature: AVX2 -func (x Int32x4) ConvertToInt64() Int64x4 - -// ConvertToInt64 converts element values to int64. -// -// Asm: VPMOVSXDQ, CPU Feature: AVX512 -func (x Int32x8) ConvertToInt64() Int64x8 - -/* ConvertToInt64x2 */ - -// ConvertToInt64x2 converts 2 lowest vector element values to int64. -// -// Asm: VPMOVSXBQ, CPU Feature: AVX -func (x Int8x16) ConvertToInt64x2() Int64x2 - -// ConvertToInt64x2 converts 2 lowest vector element values to int64. -// -// Asm: VPMOVSXWQ, CPU Feature: AVX -func (x Int16x8) ConvertToInt64x2() Int64x2 - -// ConvertToInt64x2 converts 2 lowest vector element values to int64. -// -// Asm: VPMOVSXDQ, CPU Feature: AVX -func (x Int32x4) ConvertToInt64x2() Int64x2 - -/* ConvertToInt64x4 */ - -// ConvertToInt64x4 converts 4 lowest vector element values to int64. -// -// Asm: VPMOVSXBQ, CPU Feature: AVX2 -func (x Int8x16) ConvertToInt64x4() Int64x4 - -/* ConvertToInt64x8 */ - -// ConvertToInt64x8 converts 8 lowest vector element values to int64. -// -// Asm: VPMOVSXBQ, CPU Feature: AVX512 -func (x Int8x16) ConvertToInt64x8() Int64x8 - -/* ConvertToUint8 */ - -// ConvertToUint8 converts element values to uint8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVWB, CPU Feature: AVX512 -func (x Uint16x8) ConvertToUint8() Uint8x16 - -// ConvertToUint8 converts element values to uint8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVWB, CPU Feature: AVX512 -func (x Uint16x16) ConvertToUint8() Uint8x16 - -// ConvertToUint8 converts element values to uint8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVWB, CPU Feature: AVX512 -func (x Uint16x32) ConvertToUint8() Uint8x32 - -// ConvertToUint8 converts element values to uint8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVDB, CPU Feature: AVX512 -func (x Uint32x4) ConvertToUint8() Uint8x16 - -// ConvertToUint8 converts element values to uint8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVDB, CPU Feature: AVX512 -func (x Uint32x8) ConvertToUint8() Uint8x16 - -// ConvertToUint8 converts element values to uint8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVDB, CPU Feature: AVX512 -func (x Uint32x16) ConvertToUint8() Uint8x16 - -// ConvertToUint8 converts element values to uint8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVQB, CPU Feature: AVX512 -func (x Uint64x2) ConvertToUint8() Uint8x16 - -// ConvertToUint8 converts element values to uint8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVQB, CPU Feature: AVX512 -func (x Uint64x4) ConvertToUint8() Uint8x16 - -// ConvertToUint8 converts element values to uint8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVQB, CPU Feature: AVX512 -func (x Uint64x8) ConvertToUint8() Uint8x16 - -/* ConvertToUint8Saturated */ - -// ConvertToUint8Saturated converts element values to uint8 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVUSWB, CPU Feature: AVX512 -func (x Uint16x8) ConvertToUint8Saturated() Uint8x16 - -// ConvertToUint8Saturated converts element values to uint8 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVUSWB, CPU Feature: AVX512 -func (x Uint16x16) ConvertToUint8Saturated() Uint8x16 - -// ConvertToUint8Saturated converts element values to uint8 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVUSWB, CPU Feature: AVX512 -func (x Uint16x32) ConvertToUint8Saturated() Uint8x32 - -// ConvertToUint8Saturated converts element values to uint8 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVUSDB, CPU Feature: AVX512 -func (x Uint32x4) ConvertToUint8Saturated() Uint8x16 - -// ConvertToUint8Saturated converts element values to uint8 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVUSDB, CPU Feature: AVX512 -func (x Uint32x8) ConvertToUint8Saturated() Uint8x16 - -// ConvertToUint8Saturated converts element values to uint8 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVUSDB, CPU Feature: AVX512 -func (x Uint32x16) ConvertToUint8Saturated() Uint8x16 - -// ConvertToUint8Saturated converts element values to uint8 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVUSQB, CPU Feature: AVX512 -func (x Uint64x2) ConvertToUint8Saturated() Uint8x16 - -// ConvertToUint8Saturated converts element values to uint8 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVUSQB, CPU Feature: AVX512 -func (x Uint64x4) ConvertToUint8Saturated() Uint8x16 - -// ConvertToUint8Saturated converts element values to uint8 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVUSQB, CPU Feature: AVX512 -func (x Uint64x8) ConvertToUint8Saturated() Uint8x16 - -/* ConvertToUint16 */ - -// ConvertToUint16 converts element values to uint16. -// -// Asm: VPMOVZXBW, CPU Feature: AVX2 -func (x Uint8x16) ConvertToUint16() Uint16x16 - -// ConvertToUint16 converts element values to uint16. -// -// Asm: VPMOVZXBW, CPU Feature: AVX512 -func (x Uint8x32) ConvertToUint16() Uint16x32 - -// ConvertToUint16 converts element values to uint16. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVDW, CPU Feature: AVX512 -func (x Uint32x4) ConvertToUint16() Uint16x8 - -// ConvertToUint16 converts element values to uint16. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVDW, CPU Feature: AVX512 -func (x Uint32x8) ConvertToUint16() Uint16x8 - -// ConvertToUint16 converts element values to uint16. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVDW, CPU Feature: AVX512 -func (x Uint32x16) ConvertToUint16() Uint16x16 - -// ConvertToUint16 converts element values to uint16. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVQW, CPU Feature: AVX512 -func (x Uint64x2) ConvertToUint16() Uint16x8 - -// ConvertToUint16 converts element values to uint16. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVQW, CPU Feature: AVX512 -func (x Uint64x4) ConvertToUint16() Uint16x8 - -// ConvertToUint16 converts element values to uint16. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVQW, CPU Feature: AVX512 -func (x Uint64x8) ConvertToUint16() Uint16x8 - -/* ConvertToUint16Saturated */ - -// ConvertToUint16Saturated converts element values to uint16 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVUSDW, CPU Feature: AVX512 -func (x Uint32x4) ConvertToUint16Saturated() Uint16x8 - -// ConvertToUint16Saturated converts element values to uint16 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVUSDW, CPU Feature: AVX512 -func (x Uint32x8) ConvertToUint16Saturated() Uint16x8 - -// ConvertToUint16Saturated converts element values to uint16 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVUSDW, CPU Feature: AVX512 -func (x Uint32x16) ConvertToUint16Saturated() Uint16x16 - -// ConvertToUint16Saturated converts element values to uint16 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVUSQW, CPU Feature: AVX512 -func (x Uint64x2) ConvertToUint16Saturated() Uint16x8 - -// ConvertToUint16Saturated converts element values to uint16 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVUSQW, CPU Feature: AVX512 -func (x Uint64x4) ConvertToUint16Saturated() Uint16x8 - -// ConvertToUint16Saturated converts element values to uint16 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVUSQW, CPU Feature: AVX512 -func (x Uint64x8) ConvertToUint16Saturated() Uint16x8 - -/* ConvertToUint16SaturatedPacked */ - -// ConvertToUint16SaturatedPacked converts element values to uint16 with saturation. -// With each 128-bit as a group: -// The converted group from the first input vector will be packed to the lower part of the result vector, -// the converted group from the second second input vector will be packed to the upper part of the result vector. -// -// Asm: VPACKUSDW, CPU Feature: AVX -func (x Uint32x4) ConvertToUint16SaturatedPacked(y Uint32x4) Uint16x8 - -// ConvertToUint16SaturatedPacked converts element values to uint16 with saturation. -// With each 128-bit as a group: -// The converted group from the first input vector will be packed to the lower part of the result vector, -// the converted group from the second second input vector will be packed to the upper part of the result vector. -// -// Asm: VPACKUSDW, CPU Feature: AVX2 -func (x Uint32x8) ConvertToUint16SaturatedPacked(y Uint32x8) Uint16x16 - -// ConvertToUint16SaturatedPacked converts element values to uint16 with saturation. -// With each 128-bit as a group: -// The converted group from the first input vector will be packed to the lower part of the result vector, -// the converted group from the second second input vector will be packed to the upper part of the result vector. -// -// Asm: VPACKUSDW, CPU Feature: AVX512 -func (x Uint32x16) ConvertToUint16SaturatedPacked(y Uint32x16) Uint16x32 - -/* ConvertToUint16x8 */ - -// ConvertToUint16x8 converts 8 lowest vector element values to uint16. -// -// Asm: VPMOVZXBW, CPU Feature: AVX -func (x Uint8x16) ConvertToUint16x8() Uint16x8 - /* ConvertToUint32 */ // ConvertToUint32 converts element values to uint32. @@ -2193,139 +1578,6 @@ func (x Float32x8) ConvertToUint32() Uint32x8 // Asm: VCVTPS2UDQ, CPU Feature: AVX512 func (x Float32x16) ConvertToUint32() Uint32x16 -// ConvertToUint32 converts element values to uint32. -// -// Asm: VPMOVZXBD, CPU Feature: AVX512 -func (x Uint8x16) ConvertToUint32() Uint32x16 - -// ConvertToUint32 converts element values to uint32. -// -// Asm: VPMOVZXWD, CPU Feature: AVX2 -func (x Uint16x8) ConvertToUint32() Uint32x8 - -// ConvertToUint32 converts element values to uint32. -// -// Asm: VPMOVZXWD, CPU Feature: AVX512 -func (x Uint16x16) ConvertToUint32() Uint32x16 - -// ConvertToUint32 converts element values to uint32. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVQD, CPU Feature: AVX512 -func (x Uint64x2) ConvertToUint32() Uint32x4 - -// ConvertToUint32 converts element values to uint32. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVQD, CPU Feature: AVX512 -func (x Uint64x4) ConvertToUint32() Uint32x4 - -// ConvertToUint32 converts element values to uint32. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVQD, CPU Feature: AVX512 -func (x Uint64x8) ConvertToUint32() Uint32x8 - -/* ConvertToUint32Saturated */ - -// ConvertToUint32Saturated converts element values to uint32 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVUSQD, CPU Feature: AVX512 -func (x Uint64x2) ConvertToUint32Saturated() Uint32x4 - -// ConvertToUint32Saturated converts element values to uint32 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVUSQD, CPU Feature: AVX512 -func (x Uint64x4) ConvertToUint32Saturated() Uint32x4 - -// ConvertToUint32Saturated converts element values to uint32 with saturation. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. -// -// Asm: VPMOVUSQD, CPU Feature: AVX512 -func (x Uint64x8) ConvertToUint32Saturated() Uint32x8 - -/* ConvertToUint32x4 */ - -// ConvertToUint32x4 converts 4 lowest vector element values to uint32. -// -// Asm: VPMOVZXBD, CPU Feature: AVX -func (x Uint8x16) ConvertToUint32x4() Uint32x4 - -// ConvertToUint32x4 converts 4 lowest vector element values to uint32. -// -// Asm: VPMOVZXWD, CPU Feature: AVX -func (x Uint16x8) ConvertToUint32x4() Uint32x4 - -/* ConvertToUint32x8 */ - -// ConvertToUint32x8 converts 8 lowest vector element values to uint32. -// -// Asm: VPMOVZXBD, CPU Feature: AVX2 -func (x Uint8x16) ConvertToUint32x8() Uint32x8 - -/* ConvertToUint64 */ - -// ConvertToUint64 converts element values to uint64. -// -// Asm: VPMOVZXWQ, CPU Feature: AVX512 -func (x Uint16x8) ConvertToUint64() Uint64x8 - -// ConvertToUint64 converts element values to uint64. -// -// Asm: VPMOVZXDQ, CPU Feature: AVX2 -func (x Uint32x4) ConvertToUint64() Uint64x4 - -// ConvertToUint64 converts element values to uint64. -// -// Asm: VPMOVZXDQ, CPU Feature: AVX512 -func (x Uint32x8) ConvertToUint64() Uint64x8 - -/* ConvertToUint64x2 */ - -// ConvertToUint64x2 converts 2 lowest vector element values to uint64. -// -// Asm: VPMOVZXBQ, CPU Feature: AVX -func (x Uint8x16) ConvertToUint64x2() Uint64x2 - -// ConvertToUint64x2 converts 2 lowest vector element values to uint64. -// -// Asm: VPMOVZXWQ, CPU Feature: AVX -func (x Uint16x8) ConvertToUint64x2() Uint64x2 - -// ConvertToUint64x2 converts 2 lowest vector element values to uint64. -// -// Asm: VPMOVZXDQ, CPU Feature: AVX -func (x Uint32x4) ConvertToUint64x2() Uint64x2 - -/* ConvertToUint64x4 */ - -// ConvertToUint64x4 converts 4 lowest vector element values to uint64. -// -// Asm: VPMOVSXWQ, CPU Feature: AVX2 -func (x Int16x8) ConvertToUint64x4() Int64x4 - -// ConvertToUint64x4 converts 4 lowest vector element values to uint64. -// -// Asm: VPMOVZXBQ, CPU Feature: AVX2 -func (x Uint8x16) ConvertToUint64x4() Uint64x4 - -// ConvertToUint64x4 converts 4 lowest vector element values to uint64. -// -// Asm: VPMOVZXWQ, CPU Feature: AVX2 -func (x Uint16x8) ConvertToUint64x4() Uint64x4 - -/* ConvertToUint64x8 */ - -// ConvertToUint64x8 converts 8 lowest vector element values to uint64. -// -// Asm: VPMOVZXBQ, CPU Feature: AVX512 -func (x Uint8x16) ConvertToUint64x8() Uint64x8 - /* CopySign */ // CopySign returns the product of the first operand with -1, 0, or 1, @@ -2810,6 +2062,258 @@ func (x Uint64x4) Expand(mask Mask64x4) Uint64x4 // Asm: VPEXPANDQ, CPU Feature: AVX512 func (x Uint64x8) Expand(mask Mask64x8) Uint64x8 +/* ExtendLo2ToInt64x2 */ + +// ExtendLo2ToInt64x2 converts 2 lowest vector element values to int64. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXBQ, CPU Feature: AVX +func (x Int8x16) ExtendLo2ToInt64x2() Int64x2 + +// ExtendLo2ToInt64x2 converts 2 lowest vector element values to int64. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXWQ, CPU Feature: AVX +func (x Int16x8) ExtendLo2ToInt64x2() Int64x2 + +// ExtendLo2ToInt64x2 converts 2 lowest vector element values to int64. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXDQ, CPU Feature: AVX +func (x Int32x4) ExtendLo2ToInt64x2() Int64x2 + +/* ExtendLo2ToUint64x2 */ + +// ExtendLo2ToUint64x2 converts 2 lowest vector element values to uint64. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXBQ, CPU Feature: AVX +func (x Uint8x16) ExtendLo2ToUint64x2() Uint64x2 + +// ExtendLo2ToUint64x2 converts 2 lowest vector element values to uint64. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXWQ, CPU Feature: AVX +func (x Uint16x8) ExtendLo2ToUint64x2() Uint64x2 + +// ExtendLo2ToUint64x2 converts 2 lowest vector element values to uint64. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXDQ, CPU Feature: AVX +func (x Uint32x4) ExtendLo2ToUint64x2() Uint64x2 + +/* ExtendLo4ToInt32x4 */ + +// ExtendLo4ToInt32x4 converts 4 lowest vector element values to int32. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXBD, CPU Feature: AVX +func (x Int8x16) ExtendLo4ToInt32x4() Int32x4 + +// ExtendLo4ToInt32x4 converts 4 lowest vector element values to int32. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXWD, CPU Feature: AVX +func (x Int16x8) ExtendLo4ToInt32x4() Int32x4 + +/* ExtendLo4ToInt64x4 */ + +// ExtendLo4ToInt64x4 converts 4 lowest vector element values to int64. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXBQ, CPU Feature: AVX2 +func (x Int8x16) ExtendLo4ToInt64x4() Int64x4 + +// ExtendLo4ToInt64x4 converts 4 lowest vector element values to int64. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXWQ, CPU Feature: AVX2 +func (x Int16x8) ExtendLo4ToInt64x4() Int64x4 + +/* ExtendLo4ToUint32x4 */ + +// ExtendLo4ToUint32x4 converts 4 lowest vector element values to uint32. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXBD, CPU Feature: AVX +func (x Uint8x16) ExtendLo4ToUint32x4() Uint32x4 + +// ExtendLo4ToUint32x4 converts 4 lowest vector element values to uint32. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXWD, CPU Feature: AVX +func (x Uint16x8) ExtendLo4ToUint32x4() Uint32x4 + +/* ExtendLo4ToUint64x4 */ + +// ExtendLo4ToUint64x4 converts 4 lowest vector element values to uint64. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXBQ, CPU Feature: AVX2 +func (x Uint8x16) ExtendLo4ToUint64x4() Uint64x4 + +// ExtendLo4ToUint64x4 converts 4 lowest vector element values to uint64. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXWQ, CPU Feature: AVX2 +func (x Uint16x8) ExtendLo4ToUint64x4() Uint64x4 + +/* ExtendLo8ToInt16x8 */ + +// ExtendLo8ToInt16x8 converts 8 lowest vector element values to int16. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXBW, CPU Feature: AVX +func (x Int8x16) ExtendLo8ToInt16x8() Int16x8 + +/* ExtendLo8ToInt32x8 */ + +// ExtendLo8ToInt32x8 converts 8 lowest vector element values to int32. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXBD, CPU Feature: AVX2 +func (x Int8x16) ExtendLo8ToInt32x8() Int32x8 + +/* ExtendLo8ToInt64x8 */ + +// ExtendLo8ToInt64x8 converts 8 lowest vector element values to int64. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXBQ, CPU Feature: AVX512 +func (x Int8x16) ExtendLo8ToInt64x8() Int64x8 + +/* ExtendLo8ToUint16x8 */ + +// ExtendLo8ToUint16x8 converts 8 lowest vector element values to uint16. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXBW, CPU Feature: AVX +func (x Uint8x16) ExtendLo8ToUint16x8() Uint16x8 + +/* ExtendLo8ToUint32x8 */ + +// ExtendLo8ToUint32x8 converts 8 lowest vector element values to uint32. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXBD, CPU Feature: AVX2 +func (x Uint8x16) ExtendLo8ToUint32x8() Uint32x8 + +/* ExtendLo8ToUint64x8 */ + +// ExtendLo8ToUint64x8 converts 8 lowest vector element values to uint64. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXBQ, CPU Feature: AVX512 +func (x Uint8x16) ExtendLo8ToUint64x8() Uint64x8 + +/* ExtendToInt16 */ + +// ExtendToInt16 converts element values to int16. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXBW, CPU Feature: AVX2 +func (x Int8x16) ExtendToInt16() Int16x16 + +// ExtendToInt16 converts element values to int16. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXBW, CPU Feature: AVX512 +func (x Int8x32) ExtendToInt16() Int16x32 + +/* ExtendToInt32 */ + +// ExtendToInt32 converts element values to int32. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXBD, CPU Feature: AVX512 +func (x Int8x16) ExtendToInt32() Int32x16 + +// ExtendToInt32 converts element values to int32. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXWD, CPU Feature: AVX2 +func (x Int16x8) ExtendToInt32() Int32x8 + +// ExtendToInt32 converts element values to int32. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXWD, CPU Feature: AVX512 +func (x Int16x16) ExtendToInt32() Int32x16 + +/* ExtendToInt64 */ + +// ExtendToInt64 converts element values to int64. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXWQ, CPU Feature: AVX512 +func (x Int16x8) ExtendToInt64() Int64x8 + +// ExtendToInt64 converts element values to int64. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXDQ, CPU Feature: AVX2 +func (x Int32x4) ExtendToInt64() Int64x4 + +// ExtendToInt64 converts element values to int64. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXDQ, CPU Feature: AVX512 +func (x Int32x8) ExtendToInt64() Int64x8 + +/* ExtendToUint16 */ + +// ExtendToUint16 converts element values to uint16. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXBW, CPU Feature: AVX2 +func (x Uint8x16) ExtendToUint16() Uint16x16 + +// ExtendToUint16 converts element values to uint16. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXBW, CPU Feature: AVX512 +func (x Uint8x32) ExtendToUint16() Uint16x32 + +/* ExtendToUint32 */ + +// ExtendToUint32 converts element values to uint32. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXBD, CPU Feature: AVX512 +func (x Uint8x16) ExtendToUint32() Uint32x16 + +// ExtendToUint32 converts element values to uint32. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXWD, CPU Feature: AVX2 +func (x Uint16x8) ExtendToUint32() Uint32x8 + +// ExtendToUint32 converts element values to uint32. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXWD, CPU Feature: AVX512 +func (x Uint16x16) ExtendToUint32() Uint32x16 + +/* ExtendToUint64 */ + +// ExtendToUint64 converts element values to uint64. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXWQ, CPU Feature: AVX512 +func (x Uint16x8) ExtendToUint64() Uint64x8 + +// ExtendToUint64 converts element values to uint64. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXDQ, CPU Feature: AVX2 +func (x Uint32x4) ExtendToUint64() Uint64x4 + +// ExtendToUint64 converts element values to uint64. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXDQ, CPU Feature: AVX512 +func (x Uint32x8) ExtendToUint64() Uint64x8 + /* Floor */ // Floor rounds elements down to the nearest integer. @@ -5567,6 +5071,308 @@ func (x Uint32x4) SHA256Message2(y Uint32x4) Uint32x4 // Asm: SHA256RNDS2, CPU Feature: SHA func (x Uint32x4) SHA256TwoRounds(y Uint32x4, z Uint32x4) Uint32x4 +/* SaturateToInt8 */ + +// SaturateToInt8 converts element values to int8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSWB, CPU Feature: AVX512 +func (x Int16x8) SaturateToInt8() Int8x16 + +// SaturateToInt8 converts element values to int8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSWB, CPU Feature: AVX512 +func (x Int16x16) SaturateToInt8() Int8x16 + +// SaturateToInt8 converts element values to int8. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVSWB, CPU Feature: AVX512 +func (x Int16x32) SaturateToInt8() Int8x32 + +// SaturateToInt8 converts element values to int8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSDB, CPU Feature: AVX512 +func (x Int32x4) SaturateToInt8() Int8x16 + +// SaturateToInt8 converts element values to int8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSDB, CPU Feature: AVX512 +func (x Int32x8) SaturateToInt8() Int8x16 + +// SaturateToInt8 converts element values to int8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSDB, CPU Feature: AVX512 +func (x Int32x16) SaturateToInt8() Int8x16 + +// SaturateToInt8 converts element values to int8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSQB, CPU Feature: AVX512 +func (x Int64x2) SaturateToInt8() Int8x16 + +// SaturateToInt8 converts element values to int8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSQB, CPU Feature: AVX512 +func (x Int64x4) SaturateToInt8() Int8x16 + +// SaturateToInt8 converts element values to int8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSQB, CPU Feature: AVX512 +func (x Int64x8) SaturateToInt8() Int8x16 + +/* SaturateToInt16 */ + +// SaturateToInt16 converts element values to int16. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVSDW, CPU Feature: AVX512 +func (x Int32x4) SaturateToInt16() Int16x8 + +// SaturateToInt16 converts element values to int16. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVSDW, CPU Feature: AVX512 +func (x Int32x8) SaturateToInt16() Int16x8 + +// SaturateToInt16 converts element values to int16. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVSDW, CPU Feature: AVX512 +func (x Int32x16) SaturateToInt16() Int16x16 + +// SaturateToInt16 converts element values to int16. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVSQW, CPU Feature: AVX512 +func (x Int64x2) SaturateToInt16() Int16x8 + +// SaturateToInt16 converts element values to int16. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVSQW, CPU Feature: AVX512 +func (x Int64x4) SaturateToInt16() Int16x8 + +// SaturateToInt16 converts element values to int16. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVSQW, CPU Feature: AVX512 +func (x Int64x8) SaturateToInt16() Int16x8 + +/* SaturateToInt16Concat */ + +// SaturateToInt16Concat converts element values to int16. +// With each 128-bit as a group: +// The converted group from the first input vector will be packed to the lower part of the result vector, +// the converted group from the second input vector will be packed to the upper part of the result vector. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPACKSSDW, CPU Feature: AVX +func (x Int32x4) SaturateToInt16Concat(y Int32x4) Int16x8 + +// SaturateToInt16Concat converts element values to int16. +// With each 128-bit as a group: +// The converted group from the first input vector will be packed to the lower part of the result vector, +// the converted group from the second input vector will be packed to the upper part of the result vector. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPACKSSDW, CPU Feature: AVX2 +func (x Int32x8) SaturateToInt16Concat(y Int32x8) Int16x16 + +// SaturateToInt16Concat converts element values to int16. +// With each 128-bit as a group: +// The converted group from the first input vector will be packed to the lower part of the result vector, +// the converted group from the second input vector will be packed to the upper part of the result vector. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPACKSSDW, CPU Feature: AVX512 +func (x Int32x16) SaturateToInt16Concat(y Int32x16) Int16x32 + +/* SaturateToInt32 */ + +// SaturateToInt32 converts element values to int32. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVSQD, CPU Feature: AVX512 +func (x Int64x2) SaturateToInt32() Int32x4 + +// SaturateToInt32 converts element values to int32. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVSQD, CPU Feature: AVX512 +func (x Int64x4) SaturateToInt32() Int32x4 + +// SaturateToInt32 converts element values to int32. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVSQD, CPU Feature: AVX512 +func (x Int64x8) SaturateToInt32() Int32x8 + +/* SaturateToUint8 */ + +// SaturateToUint8 converts element values to uint8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSWB, CPU Feature: AVX512 +func (x Int16x8) SaturateToUint8() Int8x16 + +// SaturateToUint8 converts element values to uint8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSWB, CPU Feature: AVX512 +func (x Int16x16) SaturateToUint8() Int8x16 + +// SaturateToUint8 converts element values to uint8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSDB, CPU Feature: AVX512 +func (x Int32x4) SaturateToUint8() Int8x16 + +// SaturateToUint8 converts element values to uint8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSDB, CPU Feature: AVX512 +func (x Int32x8) SaturateToUint8() Int8x16 + +// SaturateToUint8 converts element values to uint8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSDB, CPU Feature: AVX512 +func (x Int32x16) SaturateToUint8() Int8x16 + +// SaturateToUint8 converts element values to uint8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSQB, CPU Feature: AVX512 +func (x Int64x2) SaturateToUint8() Int8x16 + +// SaturateToUint8 converts element values to uint8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSQB, CPU Feature: AVX512 +func (x Int64x4) SaturateToUint8() Int8x16 + +// SaturateToUint8 converts element values to uint8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSQB, CPU Feature: AVX512 +func (x Int64x8) SaturateToUint8() Int8x16 + +// SaturateToUint8 converts element values to uint8. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVUSWB, CPU Feature: AVX512 +func (x Uint16x32) SaturateToUint8() Uint8x32 + +/* SaturateToUint16 */ + +// SaturateToUint16 converts element values to uint16. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVUSDW, CPU Feature: AVX512 +func (x Uint32x4) SaturateToUint16() Uint16x8 + +// SaturateToUint16 converts element values to uint16. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVUSDW, CPU Feature: AVX512 +func (x Uint32x8) SaturateToUint16() Uint16x8 + +// SaturateToUint16 converts element values to uint16. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVUSDW, CPU Feature: AVX512 +func (x Uint32x16) SaturateToUint16() Uint16x16 + +// SaturateToUint16 converts element values to uint16. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVUSQW, CPU Feature: AVX512 +func (x Uint64x2) SaturateToUint16() Uint16x8 + +// SaturateToUint16 converts element values to uint16. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVUSQW, CPU Feature: AVX512 +func (x Uint64x4) SaturateToUint16() Uint16x8 + +// SaturateToUint16 converts element values to uint16. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVUSQW, CPU Feature: AVX512 +func (x Uint64x8) SaturateToUint16() Uint16x8 + +/* SaturateToUint16Concat */ + +// SaturateToUint16Concat converts element values to uint16. +// With each 128-bit as a group: +// The converted group from the first input vector will be packed to the lower part of the result vector, +// the converted group from the second input vector will be packed to the upper part of the result vector. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPACKUSDW, CPU Feature: AVX +func (x Uint32x4) SaturateToUint16Concat(y Uint32x4) Uint16x8 + +// SaturateToUint16Concat converts element values to uint16. +// With each 128-bit as a group: +// The converted group from the first input vector will be packed to the lower part of the result vector, +// the converted group from the second input vector will be packed to the upper part of the result vector. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPACKUSDW, CPU Feature: AVX2 +func (x Uint32x8) SaturateToUint16Concat(y Uint32x8) Uint16x16 + +// SaturateToUint16Concat converts element values to uint16. +// With each 128-bit as a group: +// The converted group from the first input vector will be packed to the lower part of the result vector, +// the converted group from the second input vector will be packed to the upper part of the result vector. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPACKUSDW, CPU Feature: AVX512 +func (x Uint32x16) SaturateToUint16Concat(y Uint32x16) Uint16x32 + +/* SaturateToUint32 */ + +// SaturateToUint32 converts element values to uint32. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVUSQD, CPU Feature: AVX512 +func (x Uint64x2) SaturateToUint32() Uint32x4 + +// SaturateToUint32 converts element values to uint32. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVUSQD, CPU Feature: AVX512 +func (x Uint64x4) SaturateToUint32() Uint32x4 + +// SaturateToUint32 converts element values to uint32. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVUSQD, CPU Feature: AVX512 +func (x Uint64x8) SaturateToUint32() Uint32x8 + /* Scale */ // Scale multiplies elements by a power of 2. @@ -7378,6 +7184,250 @@ func (x Float64x4) TruncScaledResidue(prec uint8) Float64x4 // Asm: VREDUCEPD, CPU Feature: AVX512 func (x Float64x8) TruncScaledResidue(prec uint8) Float64x8 +/* TruncateToInt8 */ + +// TruncateToInt8 converts element values to int8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVWB, CPU Feature: AVX512 +func (x Int16x8) TruncateToInt8() Int8x16 + +// TruncateToInt8 converts element values to int8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVWB, CPU Feature: AVX512 +func (x Int16x16) TruncateToInt8() Int8x16 + +// TruncateToInt8 converts element values to int8. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVWB, CPU Feature: AVX512 +func (x Int16x32) TruncateToInt8() Int8x32 + +// TruncateToInt8 converts element values to int8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVDB, CPU Feature: AVX512 +func (x Int32x4) TruncateToInt8() Int8x16 + +// TruncateToInt8 converts element values to int8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVDB, CPU Feature: AVX512 +func (x Int32x8) TruncateToInt8() Int8x16 + +// TruncateToInt8 converts element values to int8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVDB, CPU Feature: AVX512 +func (x Int32x16) TruncateToInt8() Int8x16 + +// TruncateToInt8 converts element values to int8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQB, CPU Feature: AVX512 +func (x Int64x2) TruncateToInt8() Int8x16 + +// TruncateToInt8 converts element values to int8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQB, CPU Feature: AVX512 +func (x Int64x4) TruncateToInt8() Int8x16 + +// TruncateToInt8 converts element values to int8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQB, CPU Feature: AVX512 +func (x Int64x8) TruncateToInt8() Int8x16 + +/* TruncateToInt16 */ + +// TruncateToInt16 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVDW, CPU Feature: AVX512 +func (x Int32x4) TruncateToInt16() Int16x8 + +// TruncateToInt16 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVDW, CPU Feature: AVX512 +func (x Int32x8) TruncateToInt16() Int16x8 + +// TruncateToInt16 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVDW, CPU Feature: AVX512 +func (x Int32x16) TruncateToInt16() Int16x16 + +// TruncateToInt16 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVQW, CPU Feature: AVX512 +func (x Int64x2) TruncateToInt16() Int16x8 + +// TruncateToInt16 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVQW, CPU Feature: AVX512 +func (x Int64x4) TruncateToInt16() Int16x8 + +// TruncateToInt16 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVQW, CPU Feature: AVX512 +func (x Int64x8) TruncateToInt16() Int16x8 + +/* TruncateToInt32 */ + +// TruncateToInt32 converts element values to int32. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVQD, CPU Feature: AVX512 +func (x Int64x2) TruncateToInt32() Int32x4 + +// TruncateToInt32 converts element values to int32. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVQD, CPU Feature: AVX512 +func (x Int64x4) TruncateToInt32() Int32x4 + +// TruncateToInt32 converts element values to int32. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVQD, CPU Feature: AVX512 +func (x Int64x8) TruncateToInt32() Int32x8 + +/* TruncateToUint8 */ + +// TruncateToUint8 converts element values to uint8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVWB, CPU Feature: AVX512 +func (x Uint16x8) TruncateToUint8() Uint8x16 + +// TruncateToUint8 converts element values to uint8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVWB, CPU Feature: AVX512 +func (x Uint16x16) TruncateToUint8() Uint8x16 + +// TruncateToUint8 converts element values to uint8. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVWB, CPU Feature: AVX512 +func (x Uint16x32) TruncateToUint8() Uint8x32 + +// TruncateToUint8 converts element values to uint8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVDB, CPU Feature: AVX512 +func (x Uint32x4) TruncateToUint8() Uint8x16 + +// TruncateToUint8 converts element values to uint8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVDB, CPU Feature: AVX512 +func (x Uint32x8) TruncateToUint8() Uint8x16 + +// TruncateToUint8 converts element values to uint8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVDB, CPU Feature: AVX512 +func (x Uint32x16) TruncateToUint8() Uint8x16 + +// TruncateToUint8 converts element values to uint8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQB, CPU Feature: AVX512 +func (x Uint64x2) TruncateToUint8() Uint8x16 + +// TruncateToUint8 converts element values to uint8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQB, CPU Feature: AVX512 +func (x Uint64x4) TruncateToUint8() Uint8x16 + +// TruncateToUint8 converts element values to uint8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQB, CPU Feature: AVX512 +func (x Uint64x8) TruncateToUint8() Uint8x16 + +/* TruncateToUint16 */ + +// TruncateToUint16 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVDW, CPU Feature: AVX512 +func (x Uint32x4) TruncateToUint16() Uint16x8 + +// TruncateToUint16 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVDW, CPU Feature: AVX512 +func (x Uint32x8) TruncateToUint16() Uint16x8 + +// TruncateToUint16 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVDW, CPU Feature: AVX512 +func (x Uint32x16) TruncateToUint16() Uint16x16 + +// TruncateToUint16 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVQW, CPU Feature: AVX512 +func (x Uint64x2) TruncateToUint16() Uint16x8 + +// TruncateToUint16 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVQW, CPU Feature: AVX512 +func (x Uint64x4) TruncateToUint16() Uint16x8 + +// TruncateToUint16 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVQW, CPU Feature: AVX512 +func (x Uint64x8) TruncateToUint16() Uint16x8 + +/* TruncateToUint32 */ + +// TruncateToUint32 converts element values to uint32. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVQD, CPU Feature: AVX512 +func (x Uint64x2) TruncateToUint32() Uint32x4 + +// TruncateToUint32 converts element values to uint32. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVQD, CPU Feature: AVX512 +func (x Uint64x4) TruncateToUint32() Uint32x4 + +// TruncateToUint32 converts element values to uint32. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVQD, CPU Feature: AVX512 +func (x Uint64x8) TruncateToUint32() Uint32x8 + /* Xor */ // Xor performs a bitwise XOR operation between two vectors.