mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] simd: fix signatures for PermuteConstant* methods
This moves the packed-immediate methods to package-private, and adds exported versions with four parameters. Rename PermuteConstant to PermuteScalars Rename VPSHUFB Permute to PermuteOrZero Rename Permute2 to ConcatPermute Comments were repaired/enhanced. Modified the generator to support an additional tag "hideMaskMethods : true" to suppress method, intrinsic, generic, and generic translation generation for said mask-modified versions of such methods (this is already true for exported methods). Change-Id: I91e208c1fff1f28ebce4edb4e73d26003715018c Reviewed-on: https://go-review.googlesource.com/c/go/+/721342 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Junyang Shao <shaojunyang@google.com> Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
parent
e3d4645693
commit
4d26d66a49
18 changed files with 2614 additions and 1820 deletions
|
|
@ -396,7 +396,7 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VPOR256,
|
ssa.OpAMD64VPOR256,
|
||||||
ssa.OpAMD64VPORD512,
|
ssa.OpAMD64VPORD512,
|
||||||
ssa.OpAMD64VPORQ512,
|
ssa.OpAMD64VPORQ512,
|
||||||
ssa.OpAMD64VPSHUFB128,
|
ssa.OpAMD64VPERMB128,
|
||||||
ssa.OpAMD64VPERMB256,
|
ssa.OpAMD64VPERMB256,
|
||||||
ssa.OpAMD64VPERMB512,
|
ssa.OpAMD64VPERMB512,
|
||||||
ssa.OpAMD64VPERMW128,
|
ssa.OpAMD64VPERMW128,
|
||||||
|
|
@ -410,6 +410,7 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VPERMQ256,
|
ssa.OpAMD64VPERMQ256,
|
||||||
ssa.OpAMD64VPERMPD512,
|
ssa.OpAMD64VPERMPD512,
|
||||||
ssa.OpAMD64VPERMQ512,
|
ssa.OpAMD64VPERMQ512,
|
||||||
|
ssa.OpAMD64VPSHUFB128,
|
||||||
ssa.OpAMD64VPSHUFB256,
|
ssa.OpAMD64VPSHUFB256,
|
||||||
ssa.OpAMD64VPSHUFB512,
|
ssa.OpAMD64VPSHUFB512,
|
||||||
ssa.OpAMD64VPROLVD128,
|
ssa.OpAMD64VPROLVD128,
|
||||||
|
|
@ -672,9 +673,7 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VPORQMasked128,
|
ssa.OpAMD64VPORQMasked128,
|
||||||
ssa.OpAMD64VPORQMasked256,
|
ssa.OpAMD64VPORQMasked256,
|
||||||
ssa.OpAMD64VPORQMasked512,
|
ssa.OpAMD64VPORQMasked512,
|
||||||
ssa.OpAMD64VPSHUFBMasked256,
|
ssa.OpAMD64VPERMBMasked128,
|
||||||
ssa.OpAMD64VPSHUFBMasked512,
|
|
||||||
ssa.OpAMD64VPSHUFBMasked128,
|
|
||||||
ssa.OpAMD64VPERMBMasked256,
|
ssa.OpAMD64VPERMBMasked256,
|
||||||
ssa.OpAMD64VPERMBMasked512,
|
ssa.OpAMD64VPERMBMasked512,
|
||||||
ssa.OpAMD64VPERMWMasked128,
|
ssa.OpAMD64VPERMWMasked128,
|
||||||
|
|
@ -688,6 +687,9 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VPERMQMasked256,
|
ssa.OpAMD64VPERMQMasked256,
|
||||||
ssa.OpAMD64VPERMPDMasked512,
|
ssa.OpAMD64VPERMPDMasked512,
|
||||||
ssa.OpAMD64VPERMQMasked512,
|
ssa.OpAMD64VPERMQMasked512,
|
||||||
|
ssa.OpAMD64VPSHUFBMasked256,
|
||||||
|
ssa.OpAMD64VPSHUFBMasked512,
|
||||||
|
ssa.OpAMD64VPSHUFBMasked128,
|
||||||
ssa.OpAMD64VPROLVDMasked128,
|
ssa.OpAMD64VPROLVDMasked128,
|
||||||
ssa.OpAMD64VPROLVDMasked256,
|
ssa.OpAMD64VPROLVDMasked256,
|
||||||
ssa.OpAMD64VPROLVDMasked512,
|
ssa.OpAMD64VPROLVDMasked512,
|
||||||
|
|
@ -1011,12 +1013,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VEXTRACTF64X4256,
|
ssa.OpAMD64VEXTRACTF64X4256,
|
||||||
ssa.OpAMD64VEXTRACTI128128,
|
ssa.OpAMD64VEXTRACTI128128,
|
||||||
ssa.OpAMD64VEXTRACTI64X4256,
|
ssa.OpAMD64VEXTRACTI64X4256,
|
||||||
ssa.OpAMD64VPSHUFD128,
|
|
||||||
ssa.OpAMD64VPSHUFD256,
|
|
||||||
ssa.OpAMD64VPSHUFD512,
|
|
||||||
ssa.OpAMD64VPSHUFHW128,
|
|
||||||
ssa.OpAMD64VPSHUFHW256,
|
|
||||||
ssa.OpAMD64VPSHUFHW512,
|
|
||||||
ssa.OpAMD64VPROLD128,
|
ssa.OpAMD64VPROLD128,
|
||||||
ssa.OpAMD64VPROLD256,
|
ssa.OpAMD64VPROLD256,
|
||||||
ssa.OpAMD64VPROLD512,
|
ssa.OpAMD64VPROLD512,
|
||||||
|
|
@ -1029,6 +1025,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VPRORQ128,
|
ssa.OpAMD64VPRORQ128,
|
||||||
ssa.OpAMD64VPRORQ256,
|
ssa.OpAMD64VPRORQ256,
|
||||||
ssa.OpAMD64VPRORQ512,
|
ssa.OpAMD64VPRORQ512,
|
||||||
|
ssa.OpAMD64VPSHUFD128,
|
||||||
|
ssa.OpAMD64VPSHUFD256,
|
||||||
|
ssa.OpAMD64VPSHUFD512,
|
||||||
|
ssa.OpAMD64VPSHUFHW128,
|
||||||
|
ssa.OpAMD64VPSHUFHW256,
|
||||||
|
ssa.OpAMD64VPSHUFHW512,
|
||||||
|
ssa.OpAMD64VPSHUFLW128,
|
||||||
|
ssa.OpAMD64VPSHUFLW256,
|
||||||
|
ssa.OpAMD64VPSHUFLW512,
|
||||||
ssa.OpAMD64VPSLLW128const,
|
ssa.OpAMD64VPSLLW128const,
|
||||||
ssa.OpAMD64VPSLLW256const,
|
ssa.OpAMD64VPSLLW256const,
|
||||||
ssa.OpAMD64VPSLLW512const,
|
ssa.OpAMD64VPSLLW512const,
|
||||||
|
|
@ -1070,12 +1075,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VREDUCEPDMasked128,
|
ssa.OpAMD64VREDUCEPDMasked128,
|
||||||
ssa.OpAMD64VREDUCEPDMasked256,
|
ssa.OpAMD64VREDUCEPDMasked256,
|
||||||
ssa.OpAMD64VREDUCEPDMasked512,
|
ssa.OpAMD64VREDUCEPDMasked512,
|
||||||
ssa.OpAMD64VPSHUFDMasked256,
|
|
||||||
ssa.OpAMD64VPSHUFDMasked512,
|
|
||||||
ssa.OpAMD64VPSHUFHWMasked256,
|
|
||||||
ssa.OpAMD64VPSHUFHWMasked512,
|
|
||||||
ssa.OpAMD64VPSHUFHWMasked128,
|
|
||||||
ssa.OpAMD64VPSHUFDMasked128,
|
|
||||||
ssa.OpAMD64VPROLDMasked128,
|
ssa.OpAMD64VPROLDMasked128,
|
||||||
ssa.OpAMD64VPROLDMasked256,
|
ssa.OpAMD64VPROLDMasked256,
|
||||||
ssa.OpAMD64VPROLDMasked512,
|
ssa.OpAMD64VPROLDMasked512,
|
||||||
|
|
@ -1088,6 +1087,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VPRORQMasked128,
|
ssa.OpAMD64VPRORQMasked128,
|
||||||
ssa.OpAMD64VPRORQMasked256,
|
ssa.OpAMD64VPRORQMasked256,
|
||||||
ssa.OpAMD64VPRORQMasked512,
|
ssa.OpAMD64VPRORQMasked512,
|
||||||
|
ssa.OpAMD64VPSHUFDMasked256,
|
||||||
|
ssa.OpAMD64VPSHUFDMasked512,
|
||||||
|
ssa.OpAMD64VPSHUFHWMasked256,
|
||||||
|
ssa.OpAMD64VPSHUFHWMasked512,
|
||||||
|
ssa.OpAMD64VPSHUFHWMasked128,
|
||||||
|
ssa.OpAMD64VPSHUFLWMasked256,
|
||||||
|
ssa.OpAMD64VPSHUFLWMasked512,
|
||||||
|
ssa.OpAMD64VPSHUFLWMasked128,
|
||||||
|
ssa.OpAMD64VPSHUFDMasked128,
|
||||||
ssa.OpAMD64VPSLLWMasked128const,
|
ssa.OpAMD64VPSLLWMasked128const,
|
||||||
ssa.OpAMD64VPSLLWMasked256const,
|
ssa.OpAMD64VPSLLWMasked256const,
|
||||||
ssa.OpAMD64VPSLLWMasked512const,
|
ssa.OpAMD64VPSLLWMasked512const,
|
||||||
|
|
@ -1209,6 +1217,24 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
case ssa.OpAMD64VPDPWSSD128,
|
case ssa.OpAMD64VPDPWSSD128,
|
||||||
ssa.OpAMD64VPDPWSSD256,
|
ssa.OpAMD64VPDPWSSD256,
|
||||||
ssa.OpAMD64VPDPWSSD512,
|
ssa.OpAMD64VPDPWSSD512,
|
||||||
|
ssa.OpAMD64VPERMI2B128,
|
||||||
|
ssa.OpAMD64VPERMI2B256,
|
||||||
|
ssa.OpAMD64VPERMI2B512,
|
||||||
|
ssa.OpAMD64VPERMI2W128,
|
||||||
|
ssa.OpAMD64VPERMI2W256,
|
||||||
|
ssa.OpAMD64VPERMI2W512,
|
||||||
|
ssa.OpAMD64VPERMI2PS128,
|
||||||
|
ssa.OpAMD64VPERMI2D128,
|
||||||
|
ssa.OpAMD64VPERMI2PS256,
|
||||||
|
ssa.OpAMD64VPERMI2D256,
|
||||||
|
ssa.OpAMD64VPERMI2PS512,
|
||||||
|
ssa.OpAMD64VPERMI2D512,
|
||||||
|
ssa.OpAMD64VPERMI2PD128,
|
||||||
|
ssa.OpAMD64VPERMI2Q128,
|
||||||
|
ssa.OpAMD64VPERMI2PD256,
|
||||||
|
ssa.OpAMD64VPERMI2Q256,
|
||||||
|
ssa.OpAMD64VPERMI2PD512,
|
||||||
|
ssa.OpAMD64VPERMI2Q512,
|
||||||
ssa.OpAMD64VPDPBUSD128,
|
ssa.OpAMD64VPDPBUSD128,
|
||||||
ssa.OpAMD64VPDPBUSD256,
|
ssa.OpAMD64VPDPBUSD256,
|
||||||
ssa.OpAMD64VPDPBUSD512,
|
ssa.OpAMD64VPDPBUSD512,
|
||||||
|
|
@ -1233,24 +1259,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VFMSUBADD213PD128,
|
ssa.OpAMD64VFMSUBADD213PD128,
|
||||||
ssa.OpAMD64VFMSUBADD213PD256,
|
ssa.OpAMD64VFMSUBADD213PD256,
|
||||||
ssa.OpAMD64VFMSUBADD213PD512,
|
ssa.OpAMD64VFMSUBADD213PD512,
|
||||||
ssa.OpAMD64VPERMI2B128,
|
|
||||||
ssa.OpAMD64VPERMI2B256,
|
|
||||||
ssa.OpAMD64VPERMI2B512,
|
|
||||||
ssa.OpAMD64VPERMI2W128,
|
|
||||||
ssa.OpAMD64VPERMI2W256,
|
|
||||||
ssa.OpAMD64VPERMI2W512,
|
|
||||||
ssa.OpAMD64VPERMI2PS128,
|
|
||||||
ssa.OpAMD64VPERMI2D128,
|
|
||||||
ssa.OpAMD64VPERMI2PS256,
|
|
||||||
ssa.OpAMD64VPERMI2D256,
|
|
||||||
ssa.OpAMD64VPERMI2PS512,
|
|
||||||
ssa.OpAMD64VPERMI2D512,
|
|
||||||
ssa.OpAMD64VPERMI2PD128,
|
|
||||||
ssa.OpAMD64VPERMI2Q128,
|
|
||||||
ssa.OpAMD64VPERMI2PD256,
|
|
||||||
ssa.OpAMD64VPERMI2Q256,
|
|
||||||
ssa.OpAMD64VPERMI2PD512,
|
|
||||||
ssa.OpAMD64VPERMI2Q512,
|
|
||||||
ssa.OpAMD64VPSHLDVW128,
|
ssa.OpAMD64VPSHLDVW128,
|
||||||
ssa.OpAMD64VPSHLDVW256,
|
ssa.OpAMD64VPSHLDVW256,
|
||||||
ssa.OpAMD64VPSHLDVW512,
|
ssa.OpAMD64VPSHLDVW512,
|
||||||
|
|
@ -1316,6 +1324,24 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VPAVGWMasked128Merging,
|
ssa.OpAMD64VPAVGWMasked128Merging,
|
||||||
ssa.OpAMD64VPAVGWMasked256Merging,
|
ssa.OpAMD64VPAVGWMasked256Merging,
|
||||||
ssa.OpAMD64VPAVGWMasked512Merging,
|
ssa.OpAMD64VPAVGWMasked512Merging,
|
||||||
|
ssa.OpAMD64VPERMI2BMasked128,
|
||||||
|
ssa.OpAMD64VPERMI2BMasked256,
|
||||||
|
ssa.OpAMD64VPERMI2BMasked512,
|
||||||
|
ssa.OpAMD64VPERMI2WMasked128,
|
||||||
|
ssa.OpAMD64VPERMI2WMasked256,
|
||||||
|
ssa.OpAMD64VPERMI2WMasked512,
|
||||||
|
ssa.OpAMD64VPERMI2PSMasked128,
|
||||||
|
ssa.OpAMD64VPERMI2DMasked128,
|
||||||
|
ssa.OpAMD64VPERMI2PSMasked256,
|
||||||
|
ssa.OpAMD64VPERMI2DMasked256,
|
||||||
|
ssa.OpAMD64VPERMI2PSMasked512,
|
||||||
|
ssa.OpAMD64VPERMI2DMasked512,
|
||||||
|
ssa.OpAMD64VPERMI2PDMasked128,
|
||||||
|
ssa.OpAMD64VPERMI2QMasked128,
|
||||||
|
ssa.OpAMD64VPERMI2PDMasked256,
|
||||||
|
ssa.OpAMD64VPERMI2QMasked256,
|
||||||
|
ssa.OpAMD64VPERMI2PDMasked512,
|
||||||
|
ssa.OpAMD64VPERMI2QMasked512,
|
||||||
ssa.OpAMD64VPALIGNRMasked256Merging,
|
ssa.OpAMD64VPALIGNRMasked256Merging,
|
||||||
ssa.OpAMD64VPALIGNRMasked512Merging,
|
ssa.OpAMD64VPALIGNRMasked512Merging,
|
||||||
ssa.OpAMD64VPALIGNRMasked128Merging,
|
ssa.OpAMD64VPALIGNRMasked128Merging,
|
||||||
|
|
@ -1451,24 +1477,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VPORQMasked128Merging,
|
ssa.OpAMD64VPORQMasked128Merging,
|
||||||
ssa.OpAMD64VPORQMasked256Merging,
|
ssa.OpAMD64VPORQMasked256Merging,
|
||||||
ssa.OpAMD64VPORQMasked512Merging,
|
ssa.OpAMD64VPORQMasked512Merging,
|
||||||
ssa.OpAMD64VPERMI2BMasked128,
|
|
||||||
ssa.OpAMD64VPERMI2BMasked256,
|
|
||||||
ssa.OpAMD64VPERMI2BMasked512,
|
|
||||||
ssa.OpAMD64VPERMI2WMasked128,
|
|
||||||
ssa.OpAMD64VPERMI2WMasked256,
|
|
||||||
ssa.OpAMD64VPERMI2WMasked512,
|
|
||||||
ssa.OpAMD64VPERMI2PSMasked128,
|
|
||||||
ssa.OpAMD64VPERMI2DMasked128,
|
|
||||||
ssa.OpAMD64VPERMI2PSMasked256,
|
|
||||||
ssa.OpAMD64VPERMI2DMasked256,
|
|
||||||
ssa.OpAMD64VPERMI2PSMasked512,
|
|
||||||
ssa.OpAMD64VPERMI2DMasked512,
|
|
||||||
ssa.OpAMD64VPERMI2PDMasked128,
|
|
||||||
ssa.OpAMD64VPERMI2QMasked128,
|
|
||||||
ssa.OpAMD64VPERMI2PDMasked256,
|
|
||||||
ssa.OpAMD64VPERMI2QMasked256,
|
|
||||||
ssa.OpAMD64VPERMI2PDMasked512,
|
|
||||||
ssa.OpAMD64VPERMI2QMasked512,
|
|
||||||
ssa.OpAMD64VPSHUFBMasked256Merging,
|
ssa.OpAMD64VPSHUFBMasked256Merging,
|
||||||
ssa.OpAMD64VPSHUFBMasked512Merging,
|
ssa.OpAMD64VPSHUFBMasked512Merging,
|
||||||
ssa.OpAMD64VPSHUFBMasked128Merging,
|
ssa.OpAMD64VPSHUFBMasked128Merging,
|
||||||
|
|
@ -1819,6 +1827,18 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
p = simdV21load(s, v)
|
p = simdV21load(s, v)
|
||||||
|
|
||||||
case ssa.OpAMD64VPDPWSSD512load,
|
case ssa.OpAMD64VPDPWSSD512load,
|
||||||
|
ssa.OpAMD64VPERMI2PS128load,
|
||||||
|
ssa.OpAMD64VPERMI2D128load,
|
||||||
|
ssa.OpAMD64VPERMI2PS256load,
|
||||||
|
ssa.OpAMD64VPERMI2D256load,
|
||||||
|
ssa.OpAMD64VPERMI2PS512load,
|
||||||
|
ssa.OpAMD64VPERMI2D512load,
|
||||||
|
ssa.OpAMD64VPERMI2PD128load,
|
||||||
|
ssa.OpAMD64VPERMI2Q128load,
|
||||||
|
ssa.OpAMD64VPERMI2PD256load,
|
||||||
|
ssa.OpAMD64VPERMI2Q256load,
|
||||||
|
ssa.OpAMD64VPERMI2PD512load,
|
||||||
|
ssa.OpAMD64VPERMI2Q512load,
|
||||||
ssa.OpAMD64VPDPBUSD512load,
|
ssa.OpAMD64VPDPBUSD512load,
|
||||||
ssa.OpAMD64VPDPBUSDS512load,
|
ssa.OpAMD64VPDPBUSDS512load,
|
||||||
ssa.OpAMD64VFMADD213PS128load,
|
ssa.OpAMD64VFMADD213PS128load,
|
||||||
|
|
@ -1839,18 +1859,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VFMSUBADD213PD128load,
|
ssa.OpAMD64VFMSUBADD213PD128load,
|
||||||
ssa.OpAMD64VFMSUBADD213PD256load,
|
ssa.OpAMD64VFMSUBADD213PD256load,
|
||||||
ssa.OpAMD64VFMSUBADD213PD512load,
|
ssa.OpAMD64VFMSUBADD213PD512load,
|
||||||
ssa.OpAMD64VPERMI2PS128load,
|
|
||||||
ssa.OpAMD64VPERMI2D128load,
|
|
||||||
ssa.OpAMD64VPERMI2PS256load,
|
|
||||||
ssa.OpAMD64VPERMI2D256load,
|
|
||||||
ssa.OpAMD64VPERMI2PS512load,
|
|
||||||
ssa.OpAMD64VPERMI2D512load,
|
|
||||||
ssa.OpAMD64VPERMI2PD128load,
|
|
||||||
ssa.OpAMD64VPERMI2Q128load,
|
|
||||||
ssa.OpAMD64VPERMI2PD256load,
|
|
||||||
ssa.OpAMD64VPERMI2Q256load,
|
|
||||||
ssa.OpAMD64VPERMI2PD512load,
|
|
||||||
ssa.OpAMD64VPERMI2Q512load,
|
|
||||||
ssa.OpAMD64VPSHLDVD128load,
|
ssa.OpAMD64VPSHLDVD128load,
|
||||||
ssa.OpAMD64VPSHLDVD256load,
|
ssa.OpAMD64VPSHLDVD256load,
|
||||||
ssa.OpAMD64VPSHLDVD512load,
|
ssa.OpAMD64VPSHLDVD512load,
|
||||||
|
|
@ -1868,6 +1876,18 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
case ssa.OpAMD64VPDPWSSDMasked128load,
|
case ssa.OpAMD64VPDPWSSDMasked128load,
|
||||||
ssa.OpAMD64VPDPWSSDMasked256load,
|
ssa.OpAMD64VPDPWSSDMasked256load,
|
||||||
ssa.OpAMD64VPDPWSSDMasked512load,
|
ssa.OpAMD64VPDPWSSDMasked512load,
|
||||||
|
ssa.OpAMD64VPERMI2PSMasked128load,
|
||||||
|
ssa.OpAMD64VPERMI2DMasked128load,
|
||||||
|
ssa.OpAMD64VPERMI2PSMasked256load,
|
||||||
|
ssa.OpAMD64VPERMI2DMasked256load,
|
||||||
|
ssa.OpAMD64VPERMI2PSMasked512load,
|
||||||
|
ssa.OpAMD64VPERMI2DMasked512load,
|
||||||
|
ssa.OpAMD64VPERMI2PDMasked128load,
|
||||||
|
ssa.OpAMD64VPERMI2QMasked128load,
|
||||||
|
ssa.OpAMD64VPERMI2PDMasked256load,
|
||||||
|
ssa.OpAMD64VPERMI2QMasked256load,
|
||||||
|
ssa.OpAMD64VPERMI2PDMasked512load,
|
||||||
|
ssa.OpAMD64VPERMI2QMasked512load,
|
||||||
ssa.OpAMD64VPDPBUSDMasked128load,
|
ssa.OpAMD64VPDPBUSDMasked128load,
|
||||||
ssa.OpAMD64VPDPBUSDMasked256load,
|
ssa.OpAMD64VPDPBUSDMasked256load,
|
||||||
ssa.OpAMD64VPDPBUSDMasked512load,
|
ssa.OpAMD64VPDPBUSDMasked512load,
|
||||||
|
|
@ -1892,18 +1912,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VFMSUBADD213PDMasked128load,
|
ssa.OpAMD64VFMSUBADD213PDMasked128load,
|
||||||
ssa.OpAMD64VFMSUBADD213PDMasked256load,
|
ssa.OpAMD64VFMSUBADD213PDMasked256load,
|
||||||
ssa.OpAMD64VFMSUBADD213PDMasked512load,
|
ssa.OpAMD64VFMSUBADD213PDMasked512load,
|
||||||
ssa.OpAMD64VPERMI2PSMasked128load,
|
|
||||||
ssa.OpAMD64VPERMI2DMasked128load,
|
|
||||||
ssa.OpAMD64VPERMI2PSMasked256load,
|
|
||||||
ssa.OpAMD64VPERMI2DMasked256load,
|
|
||||||
ssa.OpAMD64VPERMI2PSMasked512load,
|
|
||||||
ssa.OpAMD64VPERMI2DMasked512load,
|
|
||||||
ssa.OpAMD64VPERMI2PDMasked128load,
|
|
||||||
ssa.OpAMD64VPERMI2QMasked128load,
|
|
||||||
ssa.OpAMD64VPERMI2PDMasked256load,
|
|
||||||
ssa.OpAMD64VPERMI2QMasked256load,
|
|
||||||
ssa.OpAMD64VPERMI2PDMasked512load,
|
|
||||||
ssa.OpAMD64VPERMI2QMasked512load,
|
|
||||||
ssa.OpAMD64VPSHLDVDMasked128load,
|
ssa.OpAMD64VPSHLDVDMasked128load,
|
||||||
ssa.OpAMD64VPSHLDVDMasked256load,
|
ssa.OpAMD64VPSHLDVDMasked256load,
|
||||||
ssa.OpAMD64VPSHLDVDMasked512load,
|
ssa.OpAMD64VPSHLDVDMasked512load,
|
||||||
|
|
@ -2124,7 +2132,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VREDUCEPD128load,
|
ssa.OpAMD64VREDUCEPD128load,
|
||||||
ssa.OpAMD64VREDUCEPD256load,
|
ssa.OpAMD64VREDUCEPD256load,
|
||||||
ssa.OpAMD64VREDUCEPD512load,
|
ssa.OpAMD64VREDUCEPD512load,
|
||||||
ssa.OpAMD64VPSHUFD512load,
|
|
||||||
ssa.OpAMD64VPROLD128load,
|
ssa.OpAMD64VPROLD128load,
|
||||||
ssa.OpAMD64VPROLD256load,
|
ssa.OpAMD64VPROLD256load,
|
||||||
ssa.OpAMD64VPROLD512load,
|
ssa.OpAMD64VPROLD512load,
|
||||||
|
|
@ -2137,6 +2144,7 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VPRORQ128load,
|
ssa.OpAMD64VPRORQ128load,
|
||||||
ssa.OpAMD64VPRORQ256load,
|
ssa.OpAMD64VPRORQ256load,
|
||||||
ssa.OpAMD64VPRORQ512load,
|
ssa.OpAMD64VPRORQ512load,
|
||||||
|
ssa.OpAMD64VPSHUFD512load,
|
||||||
ssa.OpAMD64VPSLLD512constload,
|
ssa.OpAMD64VPSLLD512constload,
|
||||||
ssa.OpAMD64VPSLLQ512constload,
|
ssa.OpAMD64VPSLLQ512constload,
|
||||||
ssa.OpAMD64VPSRLD512constload,
|
ssa.OpAMD64VPSRLD512constload,
|
||||||
|
|
@ -2159,9 +2167,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VREDUCEPDMasked128load,
|
ssa.OpAMD64VREDUCEPDMasked128load,
|
||||||
ssa.OpAMD64VREDUCEPDMasked256load,
|
ssa.OpAMD64VREDUCEPDMasked256load,
|
||||||
ssa.OpAMD64VREDUCEPDMasked512load,
|
ssa.OpAMD64VREDUCEPDMasked512load,
|
||||||
ssa.OpAMD64VPSHUFDMasked256load,
|
|
||||||
ssa.OpAMD64VPSHUFDMasked512load,
|
|
||||||
ssa.OpAMD64VPSHUFDMasked128load,
|
|
||||||
ssa.OpAMD64VPROLDMasked128load,
|
ssa.OpAMD64VPROLDMasked128load,
|
||||||
ssa.OpAMD64VPROLDMasked256load,
|
ssa.OpAMD64VPROLDMasked256load,
|
||||||
ssa.OpAMD64VPROLDMasked512load,
|
ssa.OpAMD64VPROLDMasked512load,
|
||||||
|
|
@ -2174,6 +2179,9 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VPRORQMasked128load,
|
ssa.OpAMD64VPRORQMasked128load,
|
||||||
ssa.OpAMD64VPRORQMasked256load,
|
ssa.OpAMD64VPRORQMasked256load,
|
||||||
ssa.OpAMD64VPRORQMasked512load,
|
ssa.OpAMD64VPRORQMasked512load,
|
||||||
|
ssa.OpAMD64VPSHUFDMasked256load,
|
||||||
|
ssa.OpAMD64VPSHUFDMasked512load,
|
||||||
|
ssa.OpAMD64VPSHUFDMasked128load,
|
||||||
ssa.OpAMD64VPSLLDMasked128constload,
|
ssa.OpAMD64VPSLLDMasked128constload,
|
||||||
ssa.OpAMD64VPSLLDMasked256constload,
|
ssa.OpAMD64VPSLLDMasked256constload,
|
||||||
ssa.OpAMD64VPSLLDMasked512constload,
|
ssa.OpAMD64VPSLLDMasked512constload,
|
||||||
|
|
@ -2447,12 +2455,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VPOPCNTQMasked128Merging,
|
ssa.OpAMD64VPOPCNTQMasked128Merging,
|
||||||
ssa.OpAMD64VPOPCNTQMasked256Merging,
|
ssa.OpAMD64VPOPCNTQMasked256Merging,
|
||||||
ssa.OpAMD64VPOPCNTQMasked512Merging,
|
ssa.OpAMD64VPOPCNTQMasked512Merging,
|
||||||
ssa.OpAMD64VPSHUFDMasked256Merging,
|
|
||||||
ssa.OpAMD64VPSHUFDMasked512Merging,
|
|
||||||
ssa.OpAMD64VPSHUFHWMasked256Merging,
|
|
||||||
ssa.OpAMD64VPSHUFHWMasked512Merging,
|
|
||||||
ssa.OpAMD64VPSHUFHWMasked128Merging,
|
|
||||||
ssa.OpAMD64VPSHUFDMasked128Merging,
|
|
||||||
ssa.OpAMD64VRCP14PSMasked128Merging,
|
ssa.OpAMD64VRCP14PSMasked128Merging,
|
||||||
ssa.OpAMD64VRCP14PSMasked256Merging,
|
ssa.OpAMD64VRCP14PSMasked256Merging,
|
||||||
ssa.OpAMD64VRCP14PSMasked512Merging,
|
ssa.OpAMD64VRCP14PSMasked512Merging,
|
||||||
|
|
@ -2483,6 +2485,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VSQRTPDMasked128Merging,
|
ssa.OpAMD64VSQRTPDMasked128Merging,
|
||||||
ssa.OpAMD64VSQRTPDMasked256Merging,
|
ssa.OpAMD64VSQRTPDMasked256Merging,
|
||||||
ssa.OpAMD64VSQRTPDMasked512Merging,
|
ssa.OpAMD64VSQRTPDMasked512Merging,
|
||||||
|
ssa.OpAMD64VPSHUFDMasked256Merging,
|
||||||
|
ssa.OpAMD64VPSHUFDMasked512Merging,
|
||||||
|
ssa.OpAMD64VPSHUFHWMasked256Merging,
|
||||||
|
ssa.OpAMD64VPSHUFHWMasked512Merging,
|
||||||
|
ssa.OpAMD64VPSHUFHWMasked128Merging,
|
||||||
|
ssa.OpAMD64VPSHUFLWMasked256Merging,
|
||||||
|
ssa.OpAMD64VPSHUFLWMasked512Merging,
|
||||||
|
ssa.OpAMD64VPSHUFLWMasked128Merging,
|
||||||
|
ssa.OpAMD64VPSHUFDMasked128Merging,
|
||||||
ssa.OpAMD64VPSLLWMasked128constMerging,
|
ssa.OpAMD64VPSLLWMasked128constMerging,
|
||||||
ssa.OpAMD64VPSLLWMasked256constMerging,
|
ssa.OpAMD64VPSLLWMasked256constMerging,
|
||||||
ssa.OpAMD64VPSLLWMasked512constMerging,
|
ssa.OpAMD64VPSLLWMasked512constMerging,
|
||||||
|
|
@ -2674,6 +2685,36 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VPCOMPRESSQMasked128,
|
ssa.OpAMD64VPCOMPRESSQMasked128,
|
||||||
ssa.OpAMD64VPCOMPRESSQMasked256,
|
ssa.OpAMD64VPCOMPRESSQMasked256,
|
||||||
ssa.OpAMD64VPCOMPRESSQMasked512,
|
ssa.OpAMD64VPCOMPRESSQMasked512,
|
||||||
|
ssa.OpAMD64VPERMI2BMasked128,
|
||||||
|
ssa.OpAMD64VPERMI2BMasked256,
|
||||||
|
ssa.OpAMD64VPERMI2BMasked512,
|
||||||
|
ssa.OpAMD64VPERMI2WMasked128,
|
||||||
|
ssa.OpAMD64VPERMI2WMasked256,
|
||||||
|
ssa.OpAMD64VPERMI2WMasked512,
|
||||||
|
ssa.OpAMD64VPERMI2PSMasked128,
|
||||||
|
ssa.OpAMD64VPERMI2PSMasked128load,
|
||||||
|
ssa.OpAMD64VPERMI2DMasked128,
|
||||||
|
ssa.OpAMD64VPERMI2DMasked128load,
|
||||||
|
ssa.OpAMD64VPERMI2PSMasked256,
|
||||||
|
ssa.OpAMD64VPERMI2PSMasked256load,
|
||||||
|
ssa.OpAMD64VPERMI2DMasked256,
|
||||||
|
ssa.OpAMD64VPERMI2DMasked256load,
|
||||||
|
ssa.OpAMD64VPERMI2PSMasked512,
|
||||||
|
ssa.OpAMD64VPERMI2PSMasked512load,
|
||||||
|
ssa.OpAMD64VPERMI2DMasked512,
|
||||||
|
ssa.OpAMD64VPERMI2DMasked512load,
|
||||||
|
ssa.OpAMD64VPERMI2PDMasked128,
|
||||||
|
ssa.OpAMD64VPERMI2PDMasked128load,
|
||||||
|
ssa.OpAMD64VPERMI2QMasked128,
|
||||||
|
ssa.OpAMD64VPERMI2QMasked128load,
|
||||||
|
ssa.OpAMD64VPERMI2PDMasked256,
|
||||||
|
ssa.OpAMD64VPERMI2PDMasked256load,
|
||||||
|
ssa.OpAMD64VPERMI2QMasked256,
|
||||||
|
ssa.OpAMD64VPERMI2QMasked256load,
|
||||||
|
ssa.OpAMD64VPERMI2PDMasked512,
|
||||||
|
ssa.OpAMD64VPERMI2PDMasked512load,
|
||||||
|
ssa.OpAMD64VPERMI2QMasked512,
|
||||||
|
ssa.OpAMD64VPERMI2QMasked512load,
|
||||||
ssa.OpAMD64VPALIGNRMasked256,
|
ssa.OpAMD64VPALIGNRMasked256,
|
||||||
ssa.OpAMD64VPALIGNRMasked512,
|
ssa.OpAMD64VPALIGNRMasked512,
|
||||||
ssa.OpAMD64VPALIGNRMasked128,
|
ssa.OpAMD64VPALIGNRMasked128,
|
||||||
|
|
@ -3061,48 +3102,7 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VPORQMasked256load,
|
ssa.OpAMD64VPORQMasked256load,
|
||||||
ssa.OpAMD64VPORQMasked512,
|
ssa.OpAMD64VPORQMasked512,
|
||||||
ssa.OpAMD64VPORQMasked512load,
|
ssa.OpAMD64VPORQMasked512load,
|
||||||
ssa.OpAMD64VPERMI2BMasked128,
|
ssa.OpAMD64VPERMBMasked128,
|
||||||
ssa.OpAMD64VPERMI2BMasked256,
|
|
||||||
ssa.OpAMD64VPERMI2BMasked512,
|
|
||||||
ssa.OpAMD64VPERMI2WMasked128,
|
|
||||||
ssa.OpAMD64VPERMI2WMasked256,
|
|
||||||
ssa.OpAMD64VPERMI2WMasked512,
|
|
||||||
ssa.OpAMD64VPERMI2PSMasked128,
|
|
||||||
ssa.OpAMD64VPERMI2PSMasked128load,
|
|
||||||
ssa.OpAMD64VPERMI2DMasked128,
|
|
||||||
ssa.OpAMD64VPERMI2DMasked128load,
|
|
||||||
ssa.OpAMD64VPERMI2PSMasked256,
|
|
||||||
ssa.OpAMD64VPERMI2PSMasked256load,
|
|
||||||
ssa.OpAMD64VPERMI2DMasked256,
|
|
||||||
ssa.OpAMD64VPERMI2DMasked256load,
|
|
||||||
ssa.OpAMD64VPERMI2PSMasked512,
|
|
||||||
ssa.OpAMD64VPERMI2PSMasked512load,
|
|
||||||
ssa.OpAMD64VPERMI2DMasked512,
|
|
||||||
ssa.OpAMD64VPERMI2DMasked512load,
|
|
||||||
ssa.OpAMD64VPERMI2PDMasked128,
|
|
||||||
ssa.OpAMD64VPERMI2PDMasked128load,
|
|
||||||
ssa.OpAMD64VPERMI2QMasked128,
|
|
||||||
ssa.OpAMD64VPERMI2QMasked128load,
|
|
||||||
ssa.OpAMD64VPERMI2PDMasked256,
|
|
||||||
ssa.OpAMD64VPERMI2PDMasked256load,
|
|
||||||
ssa.OpAMD64VPERMI2QMasked256,
|
|
||||||
ssa.OpAMD64VPERMI2QMasked256load,
|
|
||||||
ssa.OpAMD64VPERMI2PDMasked512,
|
|
||||||
ssa.OpAMD64VPERMI2PDMasked512load,
|
|
||||||
ssa.OpAMD64VPERMI2QMasked512,
|
|
||||||
ssa.OpAMD64VPERMI2QMasked512load,
|
|
||||||
ssa.OpAMD64VPSHUFDMasked256,
|
|
||||||
ssa.OpAMD64VPSHUFDMasked256load,
|
|
||||||
ssa.OpAMD64VPSHUFDMasked512,
|
|
||||||
ssa.OpAMD64VPSHUFDMasked512load,
|
|
||||||
ssa.OpAMD64VPSHUFHWMasked256,
|
|
||||||
ssa.OpAMD64VPSHUFHWMasked512,
|
|
||||||
ssa.OpAMD64VPSHUFHWMasked128,
|
|
||||||
ssa.OpAMD64VPSHUFDMasked128,
|
|
||||||
ssa.OpAMD64VPSHUFDMasked128load,
|
|
||||||
ssa.OpAMD64VPSHUFBMasked256,
|
|
||||||
ssa.OpAMD64VPSHUFBMasked512,
|
|
||||||
ssa.OpAMD64VPSHUFBMasked128,
|
|
||||||
ssa.OpAMD64VPERMBMasked256,
|
ssa.OpAMD64VPERMBMasked256,
|
||||||
ssa.OpAMD64VPERMBMasked512,
|
ssa.OpAMD64VPERMBMasked512,
|
||||||
ssa.OpAMD64VPERMWMasked128,
|
ssa.OpAMD64VPERMWMasked128,
|
||||||
|
|
@ -3124,6 +3124,9 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VPERMPDMasked512load,
|
ssa.OpAMD64VPERMPDMasked512load,
|
||||||
ssa.OpAMD64VPERMQMasked512,
|
ssa.OpAMD64VPERMQMasked512,
|
||||||
ssa.OpAMD64VPERMQMasked512load,
|
ssa.OpAMD64VPERMQMasked512load,
|
||||||
|
ssa.OpAMD64VPSHUFBMasked256,
|
||||||
|
ssa.OpAMD64VPSHUFBMasked512,
|
||||||
|
ssa.OpAMD64VPSHUFBMasked128,
|
||||||
ssa.OpAMD64VRCP14PSMasked128,
|
ssa.OpAMD64VRCP14PSMasked128,
|
||||||
ssa.OpAMD64VRCP14PSMasked128load,
|
ssa.OpAMD64VRCP14PSMasked128load,
|
||||||
ssa.OpAMD64VRCP14PSMasked256,
|
ssa.OpAMD64VRCP14PSMasked256,
|
||||||
|
|
@ -3418,6 +3421,18 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VMOVDQU64Masked128,
|
ssa.OpAMD64VMOVDQU64Masked128,
|
||||||
ssa.OpAMD64VMOVDQU64Masked256,
|
ssa.OpAMD64VMOVDQU64Masked256,
|
||||||
ssa.OpAMD64VMOVDQU64Masked512,
|
ssa.OpAMD64VMOVDQU64Masked512,
|
||||||
|
ssa.OpAMD64VPSHUFDMasked256,
|
||||||
|
ssa.OpAMD64VPSHUFDMasked256load,
|
||||||
|
ssa.OpAMD64VPSHUFDMasked512,
|
||||||
|
ssa.OpAMD64VPSHUFDMasked512load,
|
||||||
|
ssa.OpAMD64VPSHUFHWMasked256,
|
||||||
|
ssa.OpAMD64VPSHUFHWMasked512,
|
||||||
|
ssa.OpAMD64VPSHUFHWMasked128,
|
||||||
|
ssa.OpAMD64VPSHUFLWMasked256,
|
||||||
|
ssa.OpAMD64VPSHUFLWMasked512,
|
||||||
|
ssa.OpAMD64VPSHUFLWMasked128,
|
||||||
|
ssa.OpAMD64VPSHUFDMasked128,
|
||||||
|
ssa.OpAMD64VPSHUFDMasked128load,
|
||||||
ssa.OpAMD64VPSLLWMasked128const,
|
ssa.OpAMD64VPSLLWMasked128const,
|
||||||
ssa.OpAMD64VPSLLWMasked256const,
|
ssa.OpAMD64VPSLLWMasked256const,
|
||||||
ssa.OpAMD64VPSLLWMasked512const,
|
ssa.OpAMD64VPSLLWMasked512const,
|
||||||
|
|
|
||||||
|
|
@ -216,6 +216,36 @@
|
||||||
(CompressUint64x2 x mask) => (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
(CompressUint64x2 x mask) => (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||||
(CompressUint64x4 x mask) => (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
(CompressUint64x4 x mask) => (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||||
(CompressUint64x8 x mask) => (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
(CompressUint64x8 x mask) => (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||||
|
(ConcatPermuteFloat32x4 ...) => (VPERMI2PS128 ...)
|
||||||
|
(ConcatPermuteFloat32x8 ...) => (VPERMI2PS256 ...)
|
||||||
|
(ConcatPermuteFloat32x16 ...) => (VPERMI2PS512 ...)
|
||||||
|
(ConcatPermuteFloat64x2 ...) => (VPERMI2PD128 ...)
|
||||||
|
(ConcatPermuteFloat64x4 ...) => (VPERMI2PD256 ...)
|
||||||
|
(ConcatPermuteFloat64x8 ...) => (VPERMI2PD512 ...)
|
||||||
|
(ConcatPermuteInt8x16 ...) => (VPERMI2B128 ...)
|
||||||
|
(ConcatPermuteInt8x32 ...) => (VPERMI2B256 ...)
|
||||||
|
(ConcatPermuteInt8x64 ...) => (VPERMI2B512 ...)
|
||||||
|
(ConcatPermuteInt16x8 ...) => (VPERMI2W128 ...)
|
||||||
|
(ConcatPermuteInt16x16 ...) => (VPERMI2W256 ...)
|
||||||
|
(ConcatPermuteInt16x32 ...) => (VPERMI2W512 ...)
|
||||||
|
(ConcatPermuteInt32x4 ...) => (VPERMI2D128 ...)
|
||||||
|
(ConcatPermuteInt32x8 ...) => (VPERMI2D256 ...)
|
||||||
|
(ConcatPermuteInt32x16 ...) => (VPERMI2D512 ...)
|
||||||
|
(ConcatPermuteInt64x2 ...) => (VPERMI2Q128 ...)
|
||||||
|
(ConcatPermuteInt64x4 ...) => (VPERMI2Q256 ...)
|
||||||
|
(ConcatPermuteInt64x8 ...) => (VPERMI2Q512 ...)
|
||||||
|
(ConcatPermuteUint8x16 ...) => (VPERMI2B128 ...)
|
||||||
|
(ConcatPermuteUint8x32 ...) => (VPERMI2B256 ...)
|
||||||
|
(ConcatPermuteUint8x64 ...) => (VPERMI2B512 ...)
|
||||||
|
(ConcatPermuteUint16x8 ...) => (VPERMI2W128 ...)
|
||||||
|
(ConcatPermuteUint16x16 ...) => (VPERMI2W256 ...)
|
||||||
|
(ConcatPermuteUint16x32 ...) => (VPERMI2W512 ...)
|
||||||
|
(ConcatPermuteUint32x4 ...) => (VPERMI2D128 ...)
|
||||||
|
(ConcatPermuteUint32x8 ...) => (VPERMI2D256 ...)
|
||||||
|
(ConcatPermuteUint32x16 ...) => (VPERMI2D512 ...)
|
||||||
|
(ConcatPermuteUint64x2 ...) => (VPERMI2Q128 ...)
|
||||||
|
(ConcatPermuteUint64x4 ...) => (VPERMI2Q256 ...)
|
||||||
|
(ConcatPermuteUint64x8 ...) => (VPERMI2Q512 ...)
|
||||||
(ConcatShiftBytesRightUint8x16 ...) => (VPALIGNR128 ...)
|
(ConcatShiftBytesRightUint8x16 ...) => (VPALIGNR128 ...)
|
||||||
(ConcatShiftBytesRightGroupedUint8x32 ...) => (VPALIGNR256 ...)
|
(ConcatShiftBytesRightGroupedUint8x32 ...) => (VPALIGNR256 ...)
|
||||||
(ConcatShiftBytesRightGroupedUint8x64 ...) => (VPALIGNR512 ...)
|
(ConcatShiftBytesRightGroupedUint8x64 ...) => (VPALIGNR512 ...)
|
||||||
|
|
@ -794,7 +824,7 @@
|
||||||
(PermuteFloat32x16 ...) => (VPERMPS512 ...)
|
(PermuteFloat32x16 ...) => (VPERMPS512 ...)
|
||||||
(PermuteFloat64x4 ...) => (VPERMPD256 ...)
|
(PermuteFloat64x4 ...) => (VPERMPD256 ...)
|
||||||
(PermuteFloat64x8 ...) => (VPERMPD512 ...)
|
(PermuteFloat64x8 ...) => (VPERMPD512 ...)
|
||||||
(PermuteInt8x16 ...) => (VPSHUFB128 ...)
|
(PermuteInt8x16 ...) => (VPERMB128 ...)
|
||||||
(PermuteInt8x32 ...) => (VPERMB256 ...)
|
(PermuteInt8x32 ...) => (VPERMB256 ...)
|
||||||
(PermuteInt8x64 ...) => (VPERMB512 ...)
|
(PermuteInt8x64 ...) => (VPERMB512 ...)
|
||||||
(PermuteInt16x8 ...) => (VPERMW128 ...)
|
(PermuteInt16x8 ...) => (VPERMW128 ...)
|
||||||
|
|
@ -804,7 +834,7 @@
|
||||||
(PermuteInt32x16 ...) => (VPERMD512 ...)
|
(PermuteInt32x16 ...) => (VPERMD512 ...)
|
||||||
(PermuteInt64x4 ...) => (VPERMQ256 ...)
|
(PermuteInt64x4 ...) => (VPERMQ256 ...)
|
||||||
(PermuteInt64x8 ...) => (VPERMQ512 ...)
|
(PermuteInt64x8 ...) => (VPERMQ512 ...)
|
||||||
(PermuteUint8x16 ...) => (VPSHUFB128 ...)
|
(PermuteUint8x16 ...) => (VPERMB128 ...)
|
||||||
(PermuteUint8x32 ...) => (VPERMB256 ...)
|
(PermuteUint8x32 ...) => (VPERMB256 ...)
|
||||||
(PermuteUint8x64 ...) => (VPERMB512 ...)
|
(PermuteUint8x64 ...) => (VPERMB512 ...)
|
||||||
(PermuteUint16x8 ...) => (VPERMW128 ...)
|
(PermuteUint16x8 ...) => (VPERMW128 ...)
|
||||||
|
|
@ -814,62 +844,12 @@
|
||||||
(PermuteUint32x16 ...) => (VPERMD512 ...)
|
(PermuteUint32x16 ...) => (VPERMD512 ...)
|
||||||
(PermuteUint64x4 ...) => (VPERMQ256 ...)
|
(PermuteUint64x4 ...) => (VPERMQ256 ...)
|
||||||
(PermuteUint64x8 ...) => (VPERMQ512 ...)
|
(PermuteUint64x8 ...) => (VPERMQ512 ...)
|
||||||
(Permute2Float32x4 ...) => (VPERMI2PS128 ...)
|
(PermuteOrZeroInt8x16 ...) => (VPSHUFB128 ...)
|
||||||
(Permute2Float32x8 ...) => (VPERMI2PS256 ...)
|
(PermuteOrZeroUint8x16 ...) => (VPSHUFB128 ...)
|
||||||
(Permute2Float32x16 ...) => (VPERMI2PS512 ...)
|
(PermuteOrZeroGroupedInt8x32 ...) => (VPSHUFB256 ...)
|
||||||
(Permute2Float64x2 ...) => (VPERMI2PD128 ...)
|
(PermuteOrZeroGroupedInt8x64 ...) => (VPSHUFB512 ...)
|
||||||
(Permute2Float64x4 ...) => (VPERMI2PD256 ...)
|
(PermuteOrZeroGroupedUint8x32 ...) => (VPSHUFB256 ...)
|
||||||
(Permute2Float64x8 ...) => (VPERMI2PD512 ...)
|
(PermuteOrZeroGroupedUint8x64 ...) => (VPSHUFB512 ...)
|
||||||
(Permute2Int8x16 ...) => (VPERMI2B128 ...)
|
|
||||||
(Permute2Int8x32 ...) => (VPERMI2B256 ...)
|
|
||||||
(Permute2Int8x64 ...) => (VPERMI2B512 ...)
|
|
||||||
(Permute2Int16x8 ...) => (VPERMI2W128 ...)
|
|
||||||
(Permute2Int16x16 ...) => (VPERMI2W256 ...)
|
|
||||||
(Permute2Int16x32 ...) => (VPERMI2W512 ...)
|
|
||||||
(Permute2Int32x4 ...) => (VPERMI2D128 ...)
|
|
||||||
(Permute2Int32x8 ...) => (VPERMI2D256 ...)
|
|
||||||
(Permute2Int32x16 ...) => (VPERMI2D512 ...)
|
|
||||||
(Permute2Int64x2 ...) => (VPERMI2Q128 ...)
|
|
||||||
(Permute2Int64x4 ...) => (VPERMI2Q256 ...)
|
|
||||||
(Permute2Int64x8 ...) => (VPERMI2Q512 ...)
|
|
||||||
(Permute2Uint8x16 ...) => (VPERMI2B128 ...)
|
|
||||||
(Permute2Uint8x32 ...) => (VPERMI2B256 ...)
|
|
||||||
(Permute2Uint8x64 ...) => (VPERMI2B512 ...)
|
|
||||||
(Permute2Uint16x8 ...) => (VPERMI2W128 ...)
|
|
||||||
(Permute2Uint16x16 ...) => (VPERMI2W256 ...)
|
|
||||||
(Permute2Uint16x32 ...) => (VPERMI2W512 ...)
|
|
||||||
(Permute2Uint32x4 ...) => (VPERMI2D128 ...)
|
|
||||||
(Permute2Uint32x8 ...) => (VPERMI2D256 ...)
|
|
||||||
(Permute2Uint32x16 ...) => (VPERMI2D512 ...)
|
|
||||||
(Permute2Uint64x2 ...) => (VPERMI2Q128 ...)
|
|
||||||
(Permute2Uint64x4 ...) => (VPERMI2Q256 ...)
|
|
||||||
(Permute2Uint64x8 ...) => (VPERMI2Q512 ...)
|
|
||||||
(PermuteConstantInt32x4 ...) => (VPSHUFD128 ...)
|
|
||||||
(PermuteConstantUint32x4 ...) => (VPSHUFD128 ...)
|
|
||||||
(PermuteConstantGroupedInt32x8 ...) => (VPSHUFD256 ...)
|
|
||||||
(PermuteConstantGroupedInt32x16 ...) => (VPSHUFD512 ...)
|
|
||||||
(PermuteConstantGroupedUint32x8 ...) => (VPSHUFD256 ...)
|
|
||||||
(PermuteConstantGroupedUint32x16 ...) => (VPSHUFD512 ...)
|
|
||||||
(PermuteConstantHiInt16x8 ...) => (VPSHUFHW128 ...)
|
|
||||||
(PermuteConstantHiInt32x4 ...) => (VPSHUFHW128 ...)
|
|
||||||
(PermuteConstantHiUint16x8 ...) => (VPSHUFHW128 ...)
|
|
||||||
(PermuteConstantHiUint32x4 ...) => (VPSHUFHW128 ...)
|
|
||||||
(PermuteConstantHiGroupedInt16x16 ...) => (VPSHUFHW256 ...)
|
|
||||||
(PermuteConstantHiGroupedInt16x32 ...) => (VPSHUFHW512 ...)
|
|
||||||
(PermuteConstantHiGroupedUint16x16 ...) => (VPSHUFHW256 ...)
|
|
||||||
(PermuteConstantHiGroupedUint16x32 ...) => (VPSHUFHW512 ...)
|
|
||||||
(PermuteConstantLoInt16x8 ...) => (VPSHUFHW128 ...)
|
|
||||||
(PermuteConstantLoInt32x4 ...) => (VPSHUFHW128 ...)
|
|
||||||
(PermuteConstantLoUint16x8 ...) => (VPSHUFHW128 ...)
|
|
||||||
(PermuteConstantLoUint32x4 ...) => (VPSHUFHW128 ...)
|
|
||||||
(PermuteConstantLoGroupedInt16x16 ...) => (VPSHUFHW256 ...)
|
|
||||||
(PermuteConstantLoGroupedInt16x32 ...) => (VPSHUFHW512 ...)
|
|
||||||
(PermuteConstantLoGroupedUint16x16 ...) => (VPSHUFHW256 ...)
|
|
||||||
(PermuteConstantLoGroupedUint16x32 ...) => (VPSHUFHW512 ...)
|
|
||||||
(PermuteGroupedInt8x32 ...) => (VPSHUFB256 ...)
|
|
||||||
(PermuteGroupedInt8x64 ...) => (VPSHUFB512 ...)
|
|
||||||
(PermuteGroupedUint8x32 ...) => (VPSHUFB256 ...)
|
|
||||||
(PermuteGroupedUint8x64 ...) => (VPSHUFB512 ...)
|
|
||||||
(ReciprocalFloat32x4 ...) => (VRCPPS128 ...)
|
(ReciprocalFloat32x4 ...) => (VRCPPS128 ...)
|
||||||
(ReciprocalFloat32x8 ...) => (VRCPPS256 ...)
|
(ReciprocalFloat32x8 ...) => (VRCPPS256 ...)
|
||||||
(ReciprocalFloat32x16 ...) => (VRCP14PS512 ...)
|
(ReciprocalFloat32x16 ...) => (VRCP14PS512 ...)
|
||||||
|
|
@ -1324,6 +1304,24 @@
|
||||||
(concatSelectedConstantGroupedUint32x16 ...) => (VSHUFPS512 ...)
|
(concatSelectedConstantGroupedUint32x16 ...) => (VSHUFPS512 ...)
|
||||||
(concatSelectedConstantGroupedUint64x4 ...) => (VSHUFPD256 ...)
|
(concatSelectedConstantGroupedUint64x4 ...) => (VSHUFPD256 ...)
|
||||||
(concatSelectedConstantGroupedUint64x8 ...) => (VSHUFPD512 ...)
|
(concatSelectedConstantGroupedUint64x8 ...) => (VSHUFPD512 ...)
|
||||||
|
(permuteScalarsInt32x4 ...) => (VPSHUFD128 ...)
|
||||||
|
(permuteScalarsUint32x4 ...) => (VPSHUFD128 ...)
|
||||||
|
(permuteScalarsGroupedInt32x8 ...) => (VPSHUFD256 ...)
|
||||||
|
(permuteScalarsGroupedInt32x16 ...) => (VPSHUFD512 ...)
|
||||||
|
(permuteScalarsGroupedUint32x8 ...) => (VPSHUFD256 ...)
|
||||||
|
(permuteScalarsGroupedUint32x16 ...) => (VPSHUFD512 ...)
|
||||||
|
(permuteScalarsHiInt16x8 ...) => (VPSHUFHW128 ...)
|
||||||
|
(permuteScalarsHiUint16x8 ...) => (VPSHUFHW128 ...)
|
||||||
|
(permuteScalarsHiGroupedInt16x16 ...) => (VPSHUFHW256 ...)
|
||||||
|
(permuteScalarsHiGroupedInt16x32 ...) => (VPSHUFHW512 ...)
|
||||||
|
(permuteScalarsHiGroupedUint16x16 ...) => (VPSHUFHW256 ...)
|
||||||
|
(permuteScalarsHiGroupedUint16x32 ...) => (VPSHUFHW512 ...)
|
||||||
|
(permuteScalarsLoInt16x8 ...) => (VPSHUFLW128 ...)
|
||||||
|
(permuteScalarsLoUint16x8 ...) => (VPSHUFLW128 ...)
|
||||||
|
(permuteScalarsLoGroupedInt16x16 ...) => (VPSHUFLW256 ...)
|
||||||
|
(permuteScalarsLoGroupedInt16x32 ...) => (VPSHUFLW512 ...)
|
||||||
|
(permuteScalarsLoGroupedUint16x16 ...) => (VPSHUFLW256 ...)
|
||||||
|
(permuteScalarsLoGroupedUint16x32 ...) => (VPSHUFLW512 ...)
|
||||||
(ternInt32x4 ...) => (VPTERNLOGD128 ...)
|
(ternInt32x4 ...) => (VPTERNLOGD128 ...)
|
||||||
(ternInt32x8 ...) => (VPTERNLOGD256 ...)
|
(ternInt32x8 ...) => (VPTERNLOGD256 ...)
|
||||||
(ternInt32x16 ...) => (VPTERNLOGD512 ...)
|
(ternInt32x16 ...) => (VPTERNLOGD512 ...)
|
||||||
|
|
@ -1417,6 +1415,24 @@
|
||||||
(VMOVDQU64Masked128 (VREDUCEPD128 [a] x) mask) => (VREDUCEPDMasked128 [a] x mask)
|
(VMOVDQU64Masked128 (VREDUCEPD128 [a] x) mask) => (VREDUCEPDMasked128 [a] x mask)
|
||||||
(VMOVDQU64Masked256 (VREDUCEPD256 [a] x) mask) => (VREDUCEPDMasked256 [a] x mask)
|
(VMOVDQU64Masked256 (VREDUCEPD256 [a] x) mask) => (VREDUCEPDMasked256 [a] x mask)
|
||||||
(VMOVDQU64Masked512 (VREDUCEPD512 [a] x) mask) => (VREDUCEPDMasked512 [a] x mask)
|
(VMOVDQU64Masked512 (VREDUCEPD512 [a] x) mask) => (VREDUCEPDMasked512 [a] x mask)
|
||||||
|
(VMOVDQU8Masked128 (VPERMI2B128 x y z) mask) => (VPERMI2BMasked128 x y z mask)
|
||||||
|
(VMOVDQU8Masked256 (VPERMI2B256 x y z) mask) => (VPERMI2BMasked256 x y z mask)
|
||||||
|
(VMOVDQU8Masked512 (VPERMI2B512 x y z) mask) => (VPERMI2BMasked512 x y z mask)
|
||||||
|
(VMOVDQU16Masked128 (VPERMI2W128 x y z) mask) => (VPERMI2WMasked128 x y z mask)
|
||||||
|
(VMOVDQU16Masked256 (VPERMI2W256 x y z) mask) => (VPERMI2WMasked256 x y z mask)
|
||||||
|
(VMOVDQU16Masked512 (VPERMI2W512 x y z) mask) => (VPERMI2WMasked512 x y z mask)
|
||||||
|
(VMOVDQU32Masked128 (VPERMI2PS128 x y z) mask) => (VPERMI2PSMasked128 x y z mask)
|
||||||
|
(VMOVDQU32Masked128 (VPERMI2D128 x y z) mask) => (VPERMI2DMasked128 x y z mask)
|
||||||
|
(VMOVDQU32Masked256 (VPERMI2PS256 x y z) mask) => (VPERMI2PSMasked256 x y z mask)
|
||||||
|
(VMOVDQU32Masked256 (VPERMI2D256 x y z) mask) => (VPERMI2DMasked256 x y z mask)
|
||||||
|
(VMOVDQU32Masked512 (VPERMI2PS512 x y z) mask) => (VPERMI2PSMasked512 x y z mask)
|
||||||
|
(VMOVDQU32Masked512 (VPERMI2D512 x y z) mask) => (VPERMI2DMasked512 x y z mask)
|
||||||
|
(VMOVDQU64Masked128 (VPERMI2PD128 x y z) mask) => (VPERMI2PDMasked128 x y z mask)
|
||||||
|
(VMOVDQU64Masked128 (VPERMI2Q128 x y z) mask) => (VPERMI2QMasked128 x y z mask)
|
||||||
|
(VMOVDQU64Masked256 (VPERMI2PD256 x y z) mask) => (VPERMI2PDMasked256 x y z mask)
|
||||||
|
(VMOVDQU64Masked256 (VPERMI2Q256 x y z) mask) => (VPERMI2QMasked256 x y z mask)
|
||||||
|
(VMOVDQU64Masked512 (VPERMI2PD512 x y z) mask) => (VPERMI2PDMasked512 x y z mask)
|
||||||
|
(VMOVDQU64Masked512 (VPERMI2Q512 x y z) mask) => (VPERMI2QMasked512 x y z mask)
|
||||||
(VMOVDQU8Masked256 (VPALIGNR256 [a] x y) mask) => (VPALIGNRMasked256 [a] x y mask)
|
(VMOVDQU8Masked256 (VPALIGNR256 [a] x y) mask) => (VPALIGNRMasked256 [a] x y mask)
|
||||||
(VMOVDQU8Masked512 (VPALIGNR512 [a] x y) mask) => (VPALIGNRMasked512 [a] x y mask)
|
(VMOVDQU8Masked512 (VPALIGNR512 [a] x y) mask) => (VPALIGNRMasked512 [a] x y mask)
|
||||||
(VMOVDQU8Masked128 (VPALIGNR128 [a] x y) mask) => (VPALIGNRMasked128 [a] x y mask)
|
(VMOVDQU8Masked128 (VPALIGNR128 [a] x y) mask) => (VPALIGNRMasked128 [a] x y mask)
|
||||||
|
|
@ -1668,33 +1684,7 @@
|
||||||
(VMOVDQU64Masked512 (VPOPCNTQ512 x) mask) => (VPOPCNTQMasked512 x mask)
|
(VMOVDQU64Masked512 (VPOPCNTQ512 x) mask) => (VPOPCNTQMasked512 x mask)
|
||||||
(VMOVDQU32Masked512 (VPORD512 x y) mask) => (VPORDMasked512 x y mask)
|
(VMOVDQU32Masked512 (VPORD512 x y) mask) => (VPORDMasked512 x y mask)
|
||||||
(VMOVDQU64Masked512 (VPORQ512 x y) mask) => (VPORQMasked512 x y mask)
|
(VMOVDQU64Masked512 (VPORQ512 x y) mask) => (VPORQMasked512 x y mask)
|
||||||
(VMOVDQU8Masked128 (VPERMI2B128 x y z) mask) => (VPERMI2BMasked128 x y z mask)
|
(VMOVDQU8Masked128 (VPERMB128 x y) mask) => (VPERMBMasked128 x y mask)
|
||||||
(VMOVDQU8Masked256 (VPERMI2B256 x y z) mask) => (VPERMI2BMasked256 x y z mask)
|
|
||||||
(VMOVDQU8Masked512 (VPERMI2B512 x y z) mask) => (VPERMI2BMasked512 x y z mask)
|
|
||||||
(VMOVDQU16Masked128 (VPERMI2W128 x y z) mask) => (VPERMI2WMasked128 x y z mask)
|
|
||||||
(VMOVDQU16Masked256 (VPERMI2W256 x y z) mask) => (VPERMI2WMasked256 x y z mask)
|
|
||||||
(VMOVDQU16Masked512 (VPERMI2W512 x y z) mask) => (VPERMI2WMasked512 x y z mask)
|
|
||||||
(VMOVDQU32Masked128 (VPERMI2PS128 x y z) mask) => (VPERMI2PSMasked128 x y z mask)
|
|
||||||
(VMOVDQU32Masked128 (VPERMI2D128 x y z) mask) => (VPERMI2DMasked128 x y z mask)
|
|
||||||
(VMOVDQU32Masked256 (VPERMI2PS256 x y z) mask) => (VPERMI2PSMasked256 x y z mask)
|
|
||||||
(VMOVDQU32Masked256 (VPERMI2D256 x y z) mask) => (VPERMI2DMasked256 x y z mask)
|
|
||||||
(VMOVDQU32Masked512 (VPERMI2PS512 x y z) mask) => (VPERMI2PSMasked512 x y z mask)
|
|
||||||
(VMOVDQU32Masked512 (VPERMI2D512 x y z) mask) => (VPERMI2DMasked512 x y z mask)
|
|
||||||
(VMOVDQU64Masked128 (VPERMI2PD128 x y z) mask) => (VPERMI2PDMasked128 x y z mask)
|
|
||||||
(VMOVDQU64Masked128 (VPERMI2Q128 x y z) mask) => (VPERMI2QMasked128 x y z mask)
|
|
||||||
(VMOVDQU64Masked256 (VPERMI2PD256 x y z) mask) => (VPERMI2PDMasked256 x y z mask)
|
|
||||||
(VMOVDQU64Masked256 (VPERMI2Q256 x y z) mask) => (VPERMI2QMasked256 x y z mask)
|
|
||||||
(VMOVDQU64Masked512 (VPERMI2PD512 x y z) mask) => (VPERMI2PDMasked512 x y z mask)
|
|
||||||
(VMOVDQU64Masked512 (VPERMI2Q512 x y z) mask) => (VPERMI2QMasked512 x y z mask)
|
|
||||||
(VMOVDQU32Masked256 (VPSHUFD256 [a] x) mask) => (VPSHUFDMasked256 [a] x mask)
|
|
||||||
(VMOVDQU32Masked512 (VPSHUFD512 [a] x) mask) => (VPSHUFDMasked512 [a] x mask)
|
|
||||||
(VMOVDQU16Masked256 (VPSHUFHW256 [a] x) mask) => (VPSHUFHWMasked256 [a] x mask)
|
|
||||||
(VMOVDQU16Masked512 (VPSHUFHW512 [a] x) mask) => (VPSHUFHWMasked512 [a] x mask)
|
|
||||||
(VMOVDQU16Masked128 (VPSHUFHW128 [a] x) mask) => (VPSHUFHWMasked128 [a] x mask)
|
|
||||||
(VMOVDQU32Masked128 (VPSHUFD128 [a] x) mask) => (VPSHUFDMasked128 [a] x mask)
|
|
||||||
(VMOVDQU8Masked256 (VPSHUFB256 x y) mask) => (VPSHUFBMasked256 x y mask)
|
|
||||||
(VMOVDQU8Masked512 (VPSHUFB512 x y) mask) => (VPSHUFBMasked512 x y mask)
|
|
||||||
(VMOVDQU8Masked128 (VPSHUFB128 x y) mask) => (VPSHUFBMasked128 x y mask)
|
|
||||||
(VMOVDQU8Masked256 (VPERMB256 x y) mask) => (VPERMBMasked256 x y mask)
|
(VMOVDQU8Masked256 (VPERMB256 x y) mask) => (VPERMBMasked256 x y mask)
|
||||||
(VMOVDQU8Masked512 (VPERMB512 x y) mask) => (VPERMBMasked512 x y mask)
|
(VMOVDQU8Masked512 (VPERMB512 x y) mask) => (VPERMBMasked512 x y mask)
|
||||||
(VMOVDQU16Masked128 (VPERMW128 x y) mask) => (VPERMWMasked128 x y mask)
|
(VMOVDQU16Masked128 (VPERMW128 x y) mask) => (VPERMWMasked128 x y mask)
|
||||||
|
|
@ -1708,6 +1698,9 @@
|
||||||
(VMOVDQU64Masked256 (VPERMQ256 x y) mask) => (VPERMQMasked256 x y mask)
|
(VMOVDQU64Masked256 (VPERMQ256 x y) mask) => (VPERMQMasked256 x y mask)
|
||||||
(VMOVDQU64Masked512 (VPERMPD512 x y) mask) => (VPERMPDMasked512 x y mask)
|
(VMOVDQU64Masked512 (VPERMPD512 x y) mask) => (VPERMPDMasked512 x y mask)
|
||||||
(VMOVDQU64Masked512 (VPERMQ512 x y) mask) => (VPERMQMasked512 x y mask)
|
(VMOVDQU64Masked512 (VPERMQ512 x y) mask) => (VPERMQMasked512 x y mask)
|
||||||
|
(VMOVDQU8Masked256 (VPSHUFB256 x y) mask) => (VPSHUFBMasked256 x y mask)
|
||||||
|
(VMOVDQU8Masked512 (VPSHUFB512 x y) mask) => (VPSHUFBMasked512 x y mask)
|
||||||
|
(VMOVDQU8Masked128 (VPSHUFB128 x y) mask) => (VPSHUFBMasked128 x y mask)
|
||||||
(VMOVDQU32Masked512 (VRCP14PS512 x) mask) => (VRCP14PSMasked512 x mask)
|
(VMOVDQU32Masked512 (VRCP14PS512 x) mask) => (VRCP14PSMasked512 x mask)
|
||||||
(VMOVDQU64Masked128 (VRCP14PD128 x) mask) => (VRCP14PDMasked128 x mask)
|
(VMOVDQU64Masked128 (VRCP14PD128 x) mask) => (VRCP14PDMasked128 x mask)
|
||||||
(VMOVDQU64Masked256 (VRCP14PD256 x) mask) => (VRCP14PDMasked256 x mask)
|
(VMOVDQU64Masked256 (VRCP14PD256 x) mask) => (VRCP14PDMasked256 x mask)
|
||||||
|
|
@ -1874,6 +1867,15 @@
|
||||||
(VMOVDQU16Masked512 (VPSUBUSW512 x y) mask) => (VPSUBUSWMasked512 x y mask)
|
(VMOVDQU16Masked512 (VPSUBUSW512 x y) mask) => (VPSUBUSWMasked512 x y mask)
|
||||||
(VMOVDQU32Masked512 (VPXORD512 x y) mask) => (VPXORDMasked512 x y mask)
|
(VMOVDQU32Masked512 (VPXORD512 x y) mask) => (VPXORDMasked512 x y mask)
|
||||||
(VMOVDQU64Masked512 (VPXORQ512 x y) mask) => (VPXORQMasked512 x y mask)
|
(VMOVDQU64Masked512 (VPXORQ512 x y) mask) => (VPXORQMasked512 x y mask)
|
||||||
|
(VMOVDQU32Masked256 (VPSHUFD256 [a] x) mask) => (VPSHUFDMasked256 [a] x mask)
|
||||||
|
(VMOVDQU32Masked512 (VPSHUFD512 [a] x) mask) => (VPSHUFDMasked512 [a] x mask)
|
||||||
|
(VMOVDQU16Masked256 (VPSHUFHW256 [a] x) mask) => (VPSHUFHWMasked256 [a] x mask)
|
||||||
|
(VMOVDQU16Masked512 (VPSHUFHW512 [a] x) mask) => (VPSHUFHWMasked512 [a] x mask)
|
||||||
|
(VMOVDQU16Masked128 (VPSHUFHW128 [a] x) mask) => (VPSHUFHWMasked128 [a] x mask)
|
||||||
|
(VMOVDQU16Masked256 (VPSHUFLW256 [a] x) mask) => (VPSHUFLWMasked256 [a] x mask)
|
||||||
|
(VMOVDQU16Masked512 (VPSHUFLW512 [a] x) mask) => (VPSHUFLWMasked512 [a] x mask)
|
||||||
|
(VMOVDQU16Masked128 (VPSHUFLW128 [a] x) mask) => (VPSHUFLWMasked128 [a] x mask)
|
||||||
|
(VMOVDQU32Masked128 (VPSHUFD128 [a] x) mask) => (VPSHUFDMasked128 [a] x mask)
|
||||||
(VMOVDQU16Masked128 (VPSLLW128const [a] x) mask) => (VPSLLWMasked128const [a] x mask)
|
(VMOVDQU16Masked128 (VPSLLW128const [a] x) mask) => (VPSLLWMasked128const [a] x mask)
|
||||||
(VMOVDQU16Masked256 (VPSLLW256const [a] x) mask) => (VPSLLWMasked256const [a] x mask)
|
(VMOVDQU16Masked256 (VPSLLW256const [a] x) mask) => (VPSLLWMasked256const [a] x mask)
|
||||||
(VMOVDQU16Masked512 (VPSLLW512const [a] x) mask) => (VPSLLWMasked512const [a] x mask)
|
(VMOVDQU16Masked512 (VPSLLW512const [a] x) mask) => (VPSLLWMasked512const [a] x mask)
|
||||||
|
|
@ -2021,6 +2023,7 @@
|
||||||
(VPBLENDMWMasked512 dst (VPSHLDW512 [a] x y) mask) => (VPSHLDWMasked512Merging dst [a] x y mask)
|
(VPBLENDMWMasked512 dst (VPSHLDW512 [a] x y) mask) => (VPSHLDWMasked512Merging dst [a] x y mask)
|
||||||
(VPBLENDMWMasked512 dst (VPSHRDW512 [a] x y) mask) => (VPSHRDWMasked512Merging dst [a] x y mask)
|
(VPBLENDMWMasked512 dst (VPSHRDW512 [a] x y) mask) => (VPSHRDWMasked512Merging dst [a] x y mask)
|
||||||
(VPBLENDMWMasked512 dst (VPSHUFHW512 [a] x) mask) => (VPSHUFHWMasked512Merging dst [a] x mask)
|
(VPBLENDMWMasked512 dst (VPSHUFHW512 [a] x) mask) => (VPSHUFHWMasked512Merging dst [a] x mask)
|
||||||
|
(VPBLENDMWMasked512 dst (VPSHUFLW512 [a] x) mask) => (VPSHUFLWMasked512Merging dst [a] x mask)
|
||||||
(VPBLENDMWMasked512 dst (VPSLLVW512 x y) mask) => (VPSLLVWMasked512Merging dst x y mask)
|
(VPBLENDMWMasked512 dst (VPSLLVW512 x y) mask) => (VPSLLVWMasked512Merging dst x y mask)
|
||||||
(VPBLENDMWMasked512 dst (VPSLLW512const [a] x) mask) => (VPSLLWMasked512constMerging dst [a] x mask)
|
(VPBLENDMWMasked512 dst (VPSLLW512const [a] x) mask) => (VPSLLWMasked512constMerging dst [a] x mask)
|
||||||
(VPBLENDMWMasked512 dst (VPSRAVW512 x y) mask) => (VPSRAVWMasked512Merging dst x y mask)
|
(VPBLENDMWMasked512 dst (VPSRAVW512 x y) mask) => (VPSRAVWMasked512Merging dst x y mask)
|
||||||
|
|
@ -2170,6 +2173,7 @@
|
||||||
(VPBLENDVB128 dst (VPSHUFB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
(VPBLENDVB128 dst (VPSHUFB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||||
(VPBLENDVB128 dst (VPSHUFD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
(VPBLENDVB128 dst (VPSHUFD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||||
(VPBLENDVB128 dst (VPSHUFHW128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFHWMasked128Merging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
(VPBLENDVB128 dst (VPSHUFHW128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFHWMasked128Merging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||||
|
(VPBLENDVB128 dst (VPSHUFLW128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFLWMasked128Merging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||||
(VPBLENDVB128 dst (VPSLLD128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
(VPBLENDVB128 dst (VPSLLD128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||||
(VPBLENDVB128 dst (VPSLLQ128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
(VPBLENDVB128 dst (VPSLLQ128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||||
(VPBLENDVB128 dst (VPSLLVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
(VPBLENDVB128 dst (VPSLLVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||||
|
|
@ -2305,6 +2309,7 @@
|
||||||
(VPBLENDVB256 dst (VPSHUFB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
(VPBLENDVB256 dst (VPSHUFB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||||
(VPBLENDVB256 dst (VPSHUFD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
(VPBLENDVB256 dst (VPSHUFD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||||
(VPBLENDVB256 dst (VPSHUFHW256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFHWMasked256Merging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
|
(VPBLENDVB256 dst (VPSHUFHW256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFHWMasked256Merging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||||
|
(VPBLENDVB256 dst (VPSHUFLW256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFLWMasked256Merging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||||
(VPBLENDVB256 dst (VPSLLD256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
(VPBLENDVB256 dst (VPSLLD256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||||
(VPBLENDVB256 dst (VPSLLQ256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
(VPBLENDVB256 dst (VPSLLQ256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||||
(VPBLENDVB256 dst (VPSLLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
(VPBLENDVB256 dst (VPSLLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||||
|
|
@ -2410,6 +2415,30 @@
|
||||||
(VREDUCEPDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
|
(VREDUCEPDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
|
||||||
(VREDUCEPDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
|
(VREDUCEPDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
|
||||||
(VREDUCEPDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
|
(VREDUCEPDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
|
||||||
|
(VPERMI2PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PS128load {sym} [off] x y ptr mem)
|
||||||
|
(VPERMI2D128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2D128load {sym} [off] x y ptr mem)
|
||||||
|
(VPERMI2PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PS256load {sym} [off] x y ptr mem)
|
||||||
|
(VPERMI2D256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2D256load {sym} [off] x y ptr mem)
|
||||||
|
(VPERMI2PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PS512load {sym} [off] x y ptr mem)
|
||||||
|
(VPERMI2D512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2D512load {sym} [off] x y ptr mem)
|
||||||
|
(VPERMI2PD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PD128load {sym} [off] x y ptr mem)
|
||||||
|
(VPERMI2Q128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2Q128load {sym} [off] x y ptr mem)
|
||||||
|
(VPERMI2PD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PD256load {sym} [off] x y ptr mem)
|
||||||
|
(VPERMI2Q256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2Q256load {sym} [off] x y ptr mem)
|
||||||
|
(VPERMI2PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PD512load {sym} [off] x y ptr mem)
|
||||||
|
(VPERMI2Q512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2Q512load {sym} [off] x y ptr mem)
|
||||||
|
(VPERMI2PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PSMasked128load {sym} [off] x y ptr mask mem)
|
||||||
|
(VPERMI2DMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2DMasked128load {sym} [off] x y ptr mask mem)
|
||||||
|
(VPERMI2PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PSMasked256load {sym} [off] x y ptr mask mem)
|
||||||
|
(VPERMI2DMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2DMasked256load {sym} [off] x y ptr mask mem)
|
||||||
|
(VPERMI2PSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PSMasked512load {sym} [off] x y ptr mask mem)
|
||||||
|
(VPERMI2DMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2DMasked512load {sym} [off] x y ptr mask mem)
|
||||||
|
(VPERMI2PDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PDMasked128load {sym} [off] x y ptr mask mem)
|
||||||
|
(VPERMI2QMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2QMasked128load {sym} [off] x y ptr mask mem)
|
||||||
|
(VPERMI2PDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PDMasked256load {sym} [off] x y ptr mask mem)
|
||||||
|
(VPERMI2QMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2QMasked256load {sym} [off] x y ptr mask mem)
|
||||||
|
(VPERMI2PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PDMasked512load {sym} [off] x y ptr mask mem)
|
||||||
|
(VPERMI2QMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2QMasked512load {sym} [off] x y ptr mask mem)
|
||||||
(VPACKSSDW512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDW512load {sym} [off] x ptr mem)
|
(VPACKSSDW512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDW512load {sym} [off] x ptr mem)
|
||||||
(VPACKSSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked128load {sym} [off] x ptr mask mem)
|
(VPACKSSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked128load {sym} [off] x ptr mask mem)
|
||||||
(VPACKSSDWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked256load {sym} [off] x ptr mask mem)
|
(VPACKSSDWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked256load {sym} [off] x ptr mask mem)
|
||||||
|
|
@ -2636,34 +2665,6 @@
|
||||||
(VPERMQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMQ256load {sym} [off] x ptr mem)
|
(VPERMQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMQ256load {sym} [off] x ptr mem)
|
||||||
(VPERMPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMPD512load {sym} [off] x ptr mem)
|
(VPERMPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMPD512load {sym} [off] x ptr mem)
|
||||||
(VPERMQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMQ512load {sym} [off] x ptr mem)
|
(VPERMQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMQ512load {sym} [off] x ptr mem)
|
||||||
(VPERMI2PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PS128load {sym} [off] x y ptr mem)
|
|
||||||
(VPERMI2D128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2D128load {sym} [off] x y ptr mem)
|
|
||||||
(VPERMI2PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PS256load {sym} [off] x y ptr mem)
|
|
||||||
(VPERMI2D256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2D256load {sym} [off] x y ptr mem)
|
|
||||||
(VPERMI2PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PS512load {sym} [off] x y ptr mem)
|
|
||||||
(VPERMI2D512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2D512load {sym} [off] x y ptr mem)
|
|
||||||
(VPERMI2PD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PD128load {sym} [off] x y ptr mem)
|
|
||||||
(VPERMI2Q128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2Q128load {sym} [off] x y ptr mem)
|
|
||||||
(VPERMI2PD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PD256load {sym} [off] x y ptr mem)
|
|
||||||
(VPERMI2Q256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2Q256load {sym} [off] x y ptr mem)
|
|
||||||
(VPERMI2PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PD512load {sym} [off] x y ptr mem)
|
|
||||||
(VPERMI2Q512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2Q512load {sym} [off] x y ptr mem)
|
|
||||||
(VPERMI2PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PSMasked128load {sym} [off] x y ptr mask mem)
|
|
||||||
(VPERMI2DMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2DMasked128load {sym} [off] x y ptr mask mem)
|
|
||||||
(VPERMI2PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PSMasked256load {sym} [off] x y ptr mask mem)
|
|
||||||
(VPERMI2DMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2DMasked256load {sym} [off] x y ptr mask mem)
|
|
||||||
(VPERMI2PSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PSMasked512load {sym} [off] x y ptr mask mem)
|
|
||||||
(VPERMI2DMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2DMasked512load {sym} [off] x y ptr mask mem)
|
|
||||||
(VPERMI2PDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PDMasked128load {sym} [off] x y ptr mask mem)
|
|
||||||
(VPERMI2QMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2QMasked128load {sym} [off] x y ptr mask mem)
|
|
||||||
(VPERMI2PDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PDMasked256load {sym} [off] x y ptr mask mem)
|
|
||||||
(VPERMI2QMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2QMasked256load {sym} [off] x y ptr mask mem)
|
|
||||||
(VPERMI2PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PDMasked512load {sym} [off] x y ptr mask mem)
|
|
||||||
(VPERMI2QMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2QMasked512load {sym} [off] x y ptr mask mem)
|
|
||||||
(VPSHUFD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHUFD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
|
|
||||||
(VPSHUFDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
|
|
||||||
(VPSHUFDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
|
|
||||||
(VPSHUFDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
|
|
||||||
(VPERMPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMPSMasked256load {sym} [off] x ptr mask mem)
|
(VPERMPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMPSMasked256load {sym} [off] x ptr mask mem)
|
||||||
(VPERMDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMDMasked256load {sym} [off] x ptr mask mem)
|
(VPERMDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMDMasked256load {sym} [off] x ptr mask mem)
|
||||||
(VPERMPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMPSMasked512load {sym} [off] x ptr mask mem)
|
(VPERMPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMPSMasked512load {sym} [off] x ptr mask mem)
|
||||||
|
|
@ -2862,6 +2863,10 @@
|
||||||
(VPBLENDMQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPBLENDMQMasked512load {sym} [off] x ptr mask mem)
|
(VPBLENDMQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPBLENDMQMasked512load {sym} [off] x ptr mask mem)
|
||||||
(VSHUFPS512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSHUFPS512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
|
(VSHUFPS512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSHUFPS512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
|
||||||
(VSHUFPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSHUFPD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
|
(VSHUFPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSHUFPD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
|
||||||
|
(VPSHUFD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHUFD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
|
||||||
|
(VPSHUFDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
|
||||||
|
(VPSHUFDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
|
||||||
|
(VPSHUFDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
|
||||||
(VPSLLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLD512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
|
(VPSLLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLD512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
|
||||||
(VPSLLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLQ512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
|
(VPSLLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLQ512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
|
||||||
(VPSLLDMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLDMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
|
(VPSLLDMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLDMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
|
||||||
|
|
|
||||||
|
|
@ -383,8 +383,10 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VPDPWSSDMasked128", argLength: 4, reg: w3kw, asm: "VPDPWSSD", commutative: false, typ: "Vec128", resultInArg0: true},
|
{name: "VPDPWSSDMasked128", argLength: 4, reg: w3kw, asm: "VPDPWSSD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||||
{name: "VPDPWSSDMasked256", argLength: 4, reg: w3kw, asm: "VPDPWSSD", commutative: false, typ: "Vec256", resultInArg0: true},
|
{name: "VPDPWSSDMasked256", argLength: 4, reg: w3kw, asm: "VPDPWSSD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||||
{name: "VPDPWSSDMasked512", argLength: 4, reg: w3kw, asm: "VPDPWSSD", commutative: false, typ: "Vec512", resultInArg0: true},
|
{name: "VPDPWSSDMasked512", argLength: 4, reg: w3kw, asm: "VPDPWSSD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||||
|
{name: "VPERMB128", argLength: 2, reg: w21, asm: "VPERMB", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPERMB256", argLength: 2, reg: w21, asm: "VPERMB", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VPERMB256", argLength: 2, reg: w21, asm: "VPERMB", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPERMB512", argLength: 2, reg: w21, asm: "VPERMB", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPERMB512", argLength: 2, reg: w21, asm: "VPERMB", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
{name: "VPERMBMasked128", argLength: 3, reg: w2kw, asm: "VPERMB", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPERMBMasked256", argLength: 3, reg: w2kw, asm: "VPERMB", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VPERMBMasked256", argLength: 3, reg: w2kw, asm: "VPERMB", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPERMBMasked512", argLength: 3, reg: w2kw, asm: "VPERMB", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPERMBMasked512", argLength: 3, reg: w2kw, asm: "VPERMB", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPERMD256", argLength: 2, reg: v21, asm: "VPERMD", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VPERMD256", argLength: 2, reg: v21, asm: "VPERMD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
|
|
@ -1310,6 +1312,12 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VPSHUFHWMasked128", argLength: 2, reg: wkw, asm: "VPSHUFHW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VPSHUFHWMasked128", argLength: 2, reg: wkw, asm: "VPSHUFHW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPSHUFHWMasked256", argLength: 2, reg: wkw, asm: "VPSHUFHW", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VPSHUFHWMasked256", argLength: 2, reg: wkw, asm: "VPSHUFHW", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPSHUFHWMasked512", argLength: 2, reg: wkw, asm: "VPSHUFHW", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPSHUFHWMasked512", argLength: 2, reg: wkw, asm: "VPSHUFHW", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
{name: "VPSHUFLW128", argLength: 1, reg: w11, asm: "VPSHUFLW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
|
{name: "VPSHUFLW256", argLength: 1, reg: v11, asm: "VPSHUFLW", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
|
{name: "VPSHUFLW512", argLength: 1, reg: w11, asm: "VPSHUFLW", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
{name: "VPSHUFLWMasked128", argLength: 2, reg: wkw, asm: "VPSHUFLW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
|
{name: "VPSHUFLWMasked256", argLength: 2, reg: wkw, asm: "VPSHUFLW", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
|
{name: "VPSHUFLWMasked512", argLength: 2, reg: wkw, asm: "VPSHUFLW", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPSLLD128const", argLength: 1, reg: v11, asm: "VPSLLD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VPSLLD128const", argLength: 1, reg: v11, asm: "VPSLLD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VPSLLD256const", argLength: 1, reg: v11, asm: "VPSLLD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VPSLLD256const", argLength: 1, reg: v11, asm: "VPSLLD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VPSLLD512const", argLength: 1, reg: w11, asm: "VPSLLD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VPSLLD512const", argLength: 1, reg: w11, asm: "VPSLLD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
|
|
@ -2392,6 +2400,9 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VPSHUFHWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPSHUFHW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
{name: "VPSHUFHWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPSHUFHW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||||
{name: "VPSHUFHWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPSHUFHW", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
{name: "VPSHUFHWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPSHUFHW", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||||
{name: "VPSHUFHWMasked512Merging", argLength: 3, reg: w2kw, asm: "VPSHUFHW", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
{name: "VPSHUFHWMasked512Merging", argLength: 3, reg: w2kw, asm: "VPSHUFHW", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||||
|
{name: "VPSHUFLWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPSHUFLW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||||
|
{name: "VPSHUFLWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPSHUFLW", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||||
|
{name: "VPSHUFLWMasked512Merging", argLength: 3, reg: w2kw, asm: "VPSHUFLW", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||||
{name: "VPSLLDMasked128constMerging", argLength: 3, reg: w2kw, asm: "VPSLLD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
{name: "VPSLLDMasked128constMerging", argLength: 3, reg: w2kw, asm: "VPSLLD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||||
{name: "VPSLLDMasked256constMerging", argLength: 3, reg: w2kw, asm: "VPSLLD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
{name: "VPSLLDMasked256constMerging", argLength: 3, reg: w2kw, asm: "VPSLLD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||||
{name: "VPSLLDMasked512constMerging", argLength: 3, reg: w2kw, asm: "VPSLLD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
{name: "VPSLLDMasked512constMerging", argLength: 3, reg: w2kw, asm: "VPSLLD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||||
|
|
|
||||||
|
|
@ -207,6 +207,36 @@ func simdGenericOps() []opData {
|
||||||
{name: "CompressUint64x2", argLength: 2, commutative: false},
|
{name: "CompressUint64x2", argLength: 2, commutative: false},
|
||||||
{name: "CompressUint64x4", argLength: 2, commutative: false},
|
{name: "CompressUint64x4", argLength: 2, commutative: false},
|
||||||
{name: "CompressUint64x8", argLength: 2, commutative: false},
|
{name: "CompressUint64x8", argLength: 2, commutative: false},
|
||||||
|
{name: "ConcatPermuteFloat32x4", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteFloat32x8", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteFloat32x16", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteFloat64x2", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteFloat64x4", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteFloat64x8", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteInt8x16", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteInt8x32", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteInt8x64", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteInt16x8", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteInt16x16", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteInt16x32", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteInt32x4", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteInt32x8", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteInt32x16", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteInt64x2", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteInt64x4", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteInt64x8", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteUint8x16", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteUint8x32", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteUint8x64", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteUint16x8", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteUint16x16", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteUint16x32", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteUint32x4", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteUint32x8", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteUint32x16", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteUint64x2", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteUint64x4", argLength: 3, commutative: false},
|
||||||
|
{name: "ConcatPermuteUint64x8", argLength: 3, commutative: false},
|
||||||
{name: "ConvertToInt8Int16x8", argLength: 1, commutative: false},
|
{name: "ConvertToInt8Int16x8", argLength: 1, commutative: false},
|
||||||
{name: "ConvertToInt8Int16x16", argLength: 1, commutative: false},
|
{name: "ConvertToInt8Int16x16", argLength: 1, commutative: false},
|
||||||
{name: "ConvertToInt8Int16x32", argLength: 1, commutative: false},
|
{name: "ConvertToInt8Int16x32", argLength: 1, commutative: false},
|
||||||
|
|
@ -750,44 +780,10 @@ func simdGenericOps() []opData {
|
||||||
{name: "OrUint64x2", argLength: 2, commutative: true},
|
{name: "OrUint64x2", argLength: 2, commutative: true},
|
||||||
{name: "OrUint64x4", argLength: 2, commutative: true},
|
{name: "OrUint64x4", argLength: 2, commutative: true},
|
||||||
{name: "OrUint64x8", argLength: 2, commutative: true},
|
{name: "OrUint64x8", argLength: 2, commutative: true},
|
||||||
{name: "Permute2Float32x4", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Float32x8", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Float32x16", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Float64x2", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Float64x4", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Float64x8", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Int8x16", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Int8x32", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Int8x64", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Int16x8", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Int16x16", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Int16x32", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Int32x4", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Int32x8", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Int32x16", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Int64x2", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Int64x4", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Int64x8", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Uint8x16", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Uint8x32", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Uint8x64", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Uint16x8", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Uint16x16", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Uint16x32", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Uint32x4", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Uint32x8", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Uint32x16", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Uint64x2", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Uint64x4", argLength: 3, commutative: false},
|
|
||||||
{name: "Permute2Uint64x8", argLength: 3, commutative: false},
|
|
||||||
{name: "PermuteFloat32x8", argLength: 2, commutative: false},
|
{name: "PermuteFloat32x8", argLength: 2, commutative: false},
|
||||||
{name: "PermuteFloat32x16", argLength: 2, commutative: false},
|
{name: "PermuteFloat32x16", argLength: 2, commutative: false},
|
||||||
{name: "PermuteFloat64x4", argLength: 2, commutative: false},
|
{name: "PermuteFloat64x4", argLength: 2, commutative: false},
|
||||||
{name: "PermuteFloat64x8", argLength: 2, commutative: false},
|
{name: "PermuteFloat64x8", argLength: 2, commutative: false},
|
||||||
{name: "PermuteGroupedInt8x32", argLength: 2, commutative: false},
|
|
||||||
{name: "PermuteGroupedInt8x64", argLength: 2, commutative: false},
|
|
||||||
{name: "PermuteGroupedUint8x32", argLength: 2, commutative: false},
|
|
||||||
{name: "PermuteGroupedUint8x64", argLength: 2, commutative: false},
|
|
||||||
{name: "PermuteInt8x16", argLength: 2, commutative: false},
|
{name: "PermuteInt8x16", argLength: 2, commutative: false},
|
||||||
{name: "PermuteInt8x32", argLength: 2, commutative: false},
|
{name: "PermuteInt8x32", argLength: 2, commutative: false},
|
||||||
{name: "PermuteInt8x64", argLength: 2, commutative: false},
|
{name: "PermuteInt8x64", argLength: 2, commutative: false},
|
||||||
|
|
@ -798,6 +794,12 @@ func simdGenericOps() []opData {
|
||||||
{name: "PermuteInt32x16", argLength: 2, commutative: false},
|
{name: "PermuteInt32x16", argLength: 2, commutative: false},
|
||||||
{name: "PermuteInt64x4", argLength: 2, commutative: false},
|
{name: "PermuteInt64x4", argLength: 2, commutative: false},
|
||||||
{name: "PermuteInt64x8", argLength: 2, commutative: false},
|
{name: "PermuteInt64x8", argLength: 2, commutative: false},
|
||||||
|
{name: "PermuteOrZeroGroupedInt8x32", argLength: 2, commutative: false},
|
||||||
|
{name: "PermuteOrZeroGroupedInt8x64", argLength: 2, commutative: false},
|
||||||
|
{name: "PermuteOrZeroGroupedUint8x32", argLength: 2, commutative: false},
|
||||||
|
{name: "PermuteOrZeroGroupedUint8x64", argLength: 2, commutative: false},
|
||||||
|
{name: "PermuteOrZeroInt8x16", argLength: 2, commutative: false},
|
||||||
|
{name: "PermuteOrZeroUint8x16", argLength: 2, commutative: false},
|
||||||
{name: "PermuteUint8x16", argLength: 2, commutative: false},
|
{name: "PermuteUint8x16", argLength: 2, commutative: false},
|
||||||
{name: "PermuteUint8x32", argLength: 2, commutative: false},
|
{name: "PermuteUint8x32", argLength: 2, commutative: false},
|
||||||
{name: "PermuteUint8x64", argLength: 2, commutative: false},
|
{name: "PermuteUint8x64", argLength: 2, commutative: false},
|
||||||
|
|
@ -1151,28 +1153,6 @@ func simdGenericOps() []opData {
|
||||||
{name: "GetElemUint16x8", argLength: 1, commutative: false, aux: "UInt8"},
|
{name: "GetElemUint16x8", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
{name: "GetElemUint32x4", argLength: 1, commutative: false, aux: "UInt8"},
|
{name: "GetElemUint32x4", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
{name: "GetElemUint64x2", argLength: 1, commutative: false, aux: "UInt8"},
|
{name: "GetElemUint64x2", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
{name: "PermuteConstantGroupedInt32x8", argLength: 1, commutative: false, aux: "UInt8"},
|
|
||||||
{name: "PermuteConstantGroupedInt32x16", argLength: 1, commutative: false, aux: "UInt8"},
|
|
||||||
{name: "PermuteConstantGroupedUint32x8", argLength: 1, commutative: false, aux: "UInt8"},
|
|
||||||
{name: "PermuteConstantGroupedUint32x16", argLength: 1, commutative: false, aux: "UInt8"},
|
|
||||||
{name: "PermuteConstantHiGroupedInt16x16", argLength: 1, commutative: false, aux: "UInt8"},
|
|
||||||
{name: "PermuteConstantHiGroupedInt16x32", argLength: 1, commutative: false, aux: "UInt8"},
|
|
||||||
{name: "PermuteConstantHiGroupedUint16x16", argLength: 1, commutative: false, aux: "UInt8"},
|
|
||||||
{name: "PermuteConstantHiGroupedUint16x32", argLength: 1, commutative: false, aux: "UInt8"},
|
|
||||||
{name: "PermuteConstantHiInt16x8", argLength: 1, commutative: false, aux: "UInt8"},
|
|
||||||
{name: "PermuteConstantHiInt32x4", argLength: 1, commutative: false, aux: "UInt8"},
|
|
||||||
{name: "PermuteConstantHiUint16x8", argLength: 1, commutative: false, aux: "UInt8"},
|
|
||||||
{name: "PermuteConstantHiUint32x4", argLength: 1, commutative: false, aux: "UInt8"},
|
|
||||||
{name: "PermuteConstantInt32x4", argLength: 1, commutative: false, aux: "UInt8"},
|
|
||||||
{name: "PermuteConstantLoGroupedInt16x16", argLength: 1, commutative: false, aux: "UInt8"},
|
|
||||||
{name: "PermuteConstantLoGroupedInt16x32", argLength: 1, commutative: false, aux: "UInt8"},
|
|
||||||
{name: "PermuteConstantLoGroupedUint16x16", argLength: 1, commutative: false, aux: "UInt8"},
|
|
||||||
{name: "PermuteConstantLoGroupedUint16x32", argLength: 1, commutative: false, aux: "UInt8"},
|
|
||||||
{name: "PermuteConstantLoInt16x8", argLength: 1, commutative: false, aux: "UInt8"},
|
|
||||||
{name: "PermuteConstantLoInt32x4", argLength: 1, commutative: false, aux: "UInt8"},
|
|
||||||
{name: "PermuteConstantLoUint16x8", argLength: 1, commutative: false, aux: "UInt8"},
|
|
||||||
{name: "PermuteConstantLoUint32x4", argLength: 1, commutative: false, aux: "UInt8"},
|
|
||||||
{name: "PermuteConstantUint32x4", argLength: 1, commutative: false, aux: "UInt8"},
|
|
||||||
{name: "RotateAllLeftInt32x4", argLength: 1, commutative: false, aux: "UInt8"},
|
{name: "RotateAllLeftInt32x4", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
{name: "RotateAllLeftInt32x8", argLength: 1, commutative: false, aux: "UInt8"},
|
{name: "RotateAllLeftInt32x8", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
{name: "RotateAllLeftInt32x16", argLength: 1, commutative: false, aux: "UInt8"},
|
{name: "RotateAllLeftInt32x16", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
|
|
@ -1292,6 +1272,24 @@ func simdGenericOps() []opData {
|
||||||
{name: "concatSelectedConstantInt64x2", argLength: 2, commutative: false, aux: "UInt8"},
|
{name: "concatSelectedConstantInt64x2", argLength: 2, commutative: false, aux: "UInt8"},
|
||||||
{name: "concatSelectedConstantUint32x4", argLength: 2, commutative: false, aux: "UInt8"},
|
{name: "concatSelectedConstantUint32x4", argLength: 2, commutative: false, aux: "UInt8"},
|
||||||
{name: "concatSelectedConstantUint64x2", argLength: 2, commutative: false, aux: "UInt8"},
|
{name: "concatSelectedConstantUint64x2", argLength: 2, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "permuteScalarsGroupedInt32x8", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "permuteScalarsGroupedInt32x16", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "permuteScalarsGroupedUint32x8", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "permuteScalarsGroupedUint32x16", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "permuteScalarsHiGroupedInt16x16", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "permuteScalarsHiGroupedInt16x32", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "permuteScalarsHiGroupedUint16x16", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "permuteScalarsHiGroupedUint16x32", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "permuteScalarsHiInt16x8", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "permuteScalarsHiUint16x8", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "permuteScalarsInt32x4", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "permuteScalarsLoGroupedInt16x16", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "permuteScalarsLoGroupedInt16x32", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "permuteScalarsLoGroupedUint16x16", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "permuteScalarsLoGroupedUint16x32", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "permuteScalarsLoInt16x8", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "permuteScalarsLoUint16x8", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "permuteScalarsUint32x4", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
{name: "ternInt32x4", argLength: 3, commutative: false, aux: "UInt8"},
|
{name: "ternInt32x4", argLength: 3, commutative: false, aux: "UInt8"},
|
||||||
{name: "ternInt32x8", argLength: 3, commutative: false, aux: "UInt8"},
|
{name: "ternInt32x8", argLength: 3, commutative: false, aux: "UInt8"},
|
||||||
{name: "ternInt32x16", argLength: 3, commutative: false, aux: "UInt8"},
|
{name: "ternInt32x16", argLength: 3, commutative: false, aux: "UInt8"},
|
||||||
|
|
|
||||||
|
|
@ -1624,8 +1624,10 @@ const (
|
||||||
OpAMD64VPDPWSSDMasked128
|
OpAMD64VPDPWSSDMasked128
|
||||||
OpAMD64VPDPWSSDMasked256
|
OpAMD64VPDPWSSDMasked256
|
||||||
OpAMD64VPDPWSSDMasked512
|
OpAMD64VPDPWSSDMasked512
|
||||||
|
OpAMD64VPERMB128
|
||||||
OpAMD64VPERMB256
|
OpAMD64VPERMB256
|
||||||
OpAMD64VPERMB512
|
OpAMD64VPERMB512
|
||||||
|
OpAMD64VPERMBMasked128
|
||||||
OpAMD64VPERMBMasked256
|
OpAMD64VPERMBMasked256
|
||||||
OpAMD64VPERMBMasked512
|
OpAMD64VPERMBMasked512
|
||||||
OpAMD64VPERMD256
|
OpAMD64VPERMD256
|
||||||
|
|
@ -2551,6 +2553,12 @@ const (
|
||||||
OpAMD64VPSHUFHWMasked128
|
OpAMD64VPSHUFHWMasked128
|
||||||
OpAMD64VPSHUFHWMasked256
|
OpAMD64VPSHUFHWMasked256
|
||||||
OpAMD64VPSHUFHWMasked512
|
OpAMD64VPSHUFHWMasked512
|
||||||
|
OpAMD64VPSHUFLW128
|
||||||
|
OpAMD64VPSHUFLW256
|
||||||
|
OpAMD64VPSHUFLW512
|
||||||
|
OpAMD64VPSHUFLWMasked128
|
||||||
|
OpAMD64VPSHUFLWMasked256
|
||||||
|
OpAMD64VPSHUFLWMasked512
|
||||||
OpAMD64VPSLLD128const
|
OpAMD64VPSLLD128const
|
||||||
OpAMD64VPSLLD256const
|
OpAMD64VPSLLD256const
|
||||||
OpAMD64VPSLLD512const
|
OpAMD64VPSLLD512const
|
||||||
|
|
@ -3633,6 +3641,9 @@ const (
|
||||||
OpAMD64VPSHUFHWMasked128Merging
|
OpAMD64VPSHUFHWMasked128Merging
|
||||||
OpAMD64VPSHUFHWMasked256Merging
|
OpAMD64VPSHUFHWMasked256Merging
|
||||||
OpAMD64VPSHUFHWMasked512Merging
|
OpAMD64VPSHUFHWMasked512Merging
|
||||||
|
OpAMD64VPSHUFLWMasked128Merging
|
||||||
|
OpAMD64VPSHUFLWMasked256Merging
|
||||||
|
OpAMD64VPSHUFLWMasked512Merging
|
||||||
OpAMD64VPSLLDMasked128constMerging
|
OpAMD64VPSLLDMasked128constMerging
|
||||||
OpAMD64VPSLLDMasked256constMerging
|
OpAMD64VPSLLDMasked256constMerging
|
||||||
OpAMD64VPSLLDMasked512constMerging
|
OpAMD64VPSLLDMasked512constMerging
|
||||||
|
|
@ -6155,6 +6166,36 @@ const (
|
||||||
OpCompressUint64x2
|
OpCompressUint64x2
|
||||||
OpCompressUint64x4
|
OpCompressUint64x4
|
||||||
OpCompressUint64x8
|
OpCompressUint64x8
|
||||||
|
OpConcatPermuteFloat32x4
|
||||||
|
OpConcatPermuteFloat32x8
|
||||||
|
OpConcatPermuteFloat32x16
|
||||||
|
OpConcatPermuteFloat64x2
|
||||||
|
OpConcatPermuteFloat64x4
|
||||||
|
OpConcatPermuteFloat64x8
|
||||||
|
OpConcatPermuteInt8x16
|
||||||
|
OpConcatPermuteInt8x32
|
||||||
|
OpConcatPermuteInt8x64
|
||||||
|
OpConcatPermuteInt16x8
|
||||||
|
OpConcatPermuteInt16x16
|
||||||
|
OpConcatPermuteInt16x32
|
||||||
|
OpConcatPermuteInt32x4
|
||||||
|
OpConcatPermuteInt32x8
|
||||||
|
OpConcatPermuteInt32x16
|
||||||
|
OpConcatPermuteInt64x2
|
||||||
|
OpConcatPermuteInt64x4
|
||||||
|
OpConcatPermuteInt64x8
|
||||||
|
OpConcatPermuteUint8x16
|
||||||
|
OpConcatPermuteUint8x32
|
||||||
|
OpConcatPermuteUint8x64
|
||||||
|
OpConcatPermuteUint16x8
|
||||||
|
OpConcatPermuteUint16x16
|
||||||
|
OpConcatPermuteUint16x32
|
||||||
|
OpConcatPermuteUint32x4
|
||||||
|
OpConcatPermuteUint32x8
|
||||||
|
OpConcatPermuteUint32x16
|
||||||
|
OpConcatPermuteUint64x2
|
||||||
|
OpConcatPermuteUint64x4
|
||||||
|
OpConcatPermuteUint64x8
|
||||||
OpConvertToInt8Int16x8
|
OpConvertToInt8Int16x8
|
||||||
OpConvertToInt8Int16x16
|
OpConvertToInt8Int16x16
|
||||||
OpConvertToInt8Int16x32
|
OpConvertToInt8Int16x32
|
||||||
|
|
@ -6698,44 +6739,10 @@ const (
|
||||||
OpOrUint64x2
|
OpOrUint64x2
|
||||||
OpOrUint64x4
|
OpOrUint64x4
|
||||||
OpOrUint64x8
|
OpOrUint64x8
|
||||||
OpPermute2Float32x4
|
|
||||||
OpPermute2Float32x8
|
|
||||||
OpPermute2Float32x16
|
|
||||||
OpPermute2Float64x2
|
|
||||||
OpPermute2Float64x4
|
|
||||||
OpPermute2Float64x8
|
|
||||||
OpPermute2Int8x16
|
|
||||||
OpPermute2Int8x32
|
|
||||||
OpPermute2Int8x64
|
|
||||||
OpPermute2Int16x8
|
|
||||||
OpPermute2Int16x16
|
|
||||||
OpPermute2Int16x32
|
|
||||||
OpPermute2Int32x4
|
|
||||||
OpPermute2Int32x8
|
|
||||||
OpPermute2Int32x16
|
|
||||||
OpPermute2Int64x2
|
|
||||||
OpPermute2Int64x4
|
|
||||||
OpPermute2Int64x8
|
|
||||||
OpPermute2Uint8x16
|
|
||||||
OpPermute2Uint8x32
|
|
||||||
OpPermute2Uint8x64
|
|
||||||
OpPermute2Uint16x8
|
|
||||||
OpPermute2Uint16x16
|
|
||||||
OpPermute2Uint16x32
|
|
||||||
OpPermute2Uint32x4
|
|
||||||
OpPermute2Uint32x8
|
|
||||||
OpPermute2Uint32x16
|
|
||||||
OpPermute2Uint64x2
|
|
||||||
OpPermute2Uint64x4
|
|
||||||
OpPermute2Uint64x8
|
|
||||||
OpPermuteFloat32x8
|
OpPermuteFloat32x8
|
||||||
OpPermuteFloat32x16
|
OpPermuteFloat32x16
|
||||||
OpPermuteFloat64x4
|
OpPermuteFloat64x4
|
||||||
OpPermuteFloat64x8
|
OpPermuteFloat64x8
|
||||||
OpPermuteGroupedInt8x32
|
|
||||||
OpPermuteGroupedInt8x64
|
|
||||||
OpPermuteGroupedUint8x32
|
|
||||||
OpPermuteGroupedUint8x64
|
|
||||||
OpPermuteInt8x16
|
OpPermuteInt8x16
|
||||||
OpPermuteInt8x32
|
OpPermuteInt8x32
|
||||||
OpPermuteInt8x64
|
OpPermuteInt8x64
|
||||||
|
|
@ -6746,6 +6753,12 @@ const (
|
||||||
OpPermuteInt32x16
|
OpPermuteInt32x16
|
||||||
OpPermuteInt64x4
|
OpPermuteInt64x4
|
||||||
OpPermuteInt64x8
|
OpPermuteInt64x8
|
||||||
|
OpPermuteOrZeroGroupedInt8x32
|
||||||
|
OpPermuteOrZeroGroupedInt8x64
|
||||||
|
OpPermuteOrZeroGroupedUint8x32
|
||||||
|
OpPermuteOrZeroGroupedUint8x64
|
||||||
|
OpPermuteOrZeroInt8x16
|
||||||
|
OpPermuteOrZeroUint8x16
|
||||||
OpPermuteUint8x16
|
OpPermuteUint8x16
|
||||||
OpPermuteUint8x32
|
OpPermuteUint8x32
|
||||||
OpPermuteUint8x64
|
OpPermuteUint8x64
|
||||||
|
|
@ -7099,28 +7112,6 @@ const (
|
||||||
OpGetElemUint16x8
|
OpGetElemUint16x8
|
||||||
OpGetElemUint32x4
|
OpGetElemUint32x4
|
||||||
OpGetElemUint64x2
|
OpGetElemUint64x2
|
||||||
OpPermuteConstantGroupedInt32x8
|
|
||||||
OpPermuteConstantGroupedInt32x16
|
|
||||||
OpPermuteConstantGroupedUint32x8
|
|
||||||
OpPermuteConstantGroupedUint32x16
|
|
||||||
OpPermuteConstantHiGroupedInt16x16
|
|
||||||
OpPermuteConstantHiGroupedInt16x32
|
|
||||||
OpPermuteConstantHiGroupedUint16x16
|
|
||||||
OpPermuteConstantHiGroupedUint16x32
|
|
||||||
OpPermuteConstantHiInt16x8
|
|
||||||
OpPermuteConstantHiInt32x4
|
|
||||||
OpPermuteConstantHiUint16x8
|
|
||||||
OpPermuteConstantHiUint32x4
|
|
||||||
OpPermuteConstantInt32x4
|
|
||||||
OpPermuteConstantLoGroupedInt16x16
|
|
||||||
OpPermuteConstantLoGroupedInt16x32
|
|
||||||
OpPermuteConstantLoGroupedUint16x16
|
|
||||||
OpPermuteConstantLoGroupedUint16x32
|
|
||||||
OpPermuteConstantLoInt16x8
|
|
||||||
OpPermuteConstantLoInt32x4
|
|
||||||
OpPermuteConstantLoUint16x8
|
|
||||||
OpPermuteConstantLoUint32x4
|
|
||||||
OpPermuteConstantUint32x4
|
|
||||||
OpRotateAllLeftInt32x4
|
OpRotateAllLeftInt32x4
|
||||||
OpRotateAllLeftInt32x8
|
OpRotateAllLeftInt32x8
|
||||||
OpRotateAllLeftInt32x16
|
OpRotateAllLeftInt32x16
|
||||||
|
|
@ -7240,6 +7231,24 @@ const (
|
||||||
OpconcatSelectedConstantInt64x2
|
OpconcatSelectedConstantInt64x2
|
||||||
OpconcatSelectedConstantUint32x4
|
OpconcatSelectedConstantUint32x4
|
||||||
OpconcatSelectedConstantUint64x2
|
OpconcatSelectedConstantUint64x2
|
||||||
|
OppermuteScalarsGroupedInt32x8
|
||||||
|
OppermuteScalarsGroupedInt32x16
|
||||||
|
OppermuteScalarsGroupedUint32x8
|
||||||
|
OppermuteScalarsGroupedUint32x16
|
||||||
|
OppermuteScalarsHiGroupedInt16x16
|
||||||
|
OppermuteScalarsHiGroupedInt16x32
|
||||||
|
OppermuteScalarsHiGroupedUint16x16
|
||||||
|
OppermuteScalarsHiGroupedUint16x32
|
||||||
|
OppermuteScalarsHiInt16x8
|
||||||
|
OppermuteScalarsHiUint16x8
|
||||||
|
OppermuteScalarsInt32x4
|
||||||
|
OppermuteScalarsLoGroupedInt16x16
|
||||||
|
OppermuteScalarsLoGroupedInt16x32
|
||||||
|
OppermuteScalarsLoGroupedUint16x16
|
||||||
|
OppermuteScalarsLoGroupedUint16x32
|
||||||
|
OppermuteScalarsLoInt16x8
|
||||||
|
OppermuteScalarsLoUint16x8
|
||||||
|
OppermuteScalarsUint32x4
|
||||||
OpternInt32x4
|
OpternInt32x4
|
||||||
OpternInt32x8
|
OpternInt32x8
|
||||||
OpternInt32x16
|
OpternInt32x16
|
||||||
|
|
@ -26142,6 +26151,20 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "VPERMB128",
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVPERMB,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "VPERMB256",
|
name: "VPERMB256",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
@ -26170,6 +26193,21 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "VPERMBMasked128",
|
||||||
|
argLen: 3,
|
||||||
|
asm: x86.AVPERMB,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "VPERMBMasked256",
|
name: "VPERMBMasked256",
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
|
|
@ -39744,6 +39782,93 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "VPSHUFLW128",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
asm: x86.AVPSHUFLW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPSHUFLW256",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
asm: x86.AVPSHUFLW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPSHUFLW512",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
asm: x86.AVPSHUFLW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPSHUFLWMasked128",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVPSHUFLW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPSHUFLWMasked256",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVPSHUFLW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPSHUFLWMasked512",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 2,
|
||||||
|
asm: x86.AVPSHUFLW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "VPSLLD128const",
|
name: "VPSLLD128const",
|
||||||
auxType: auxUInt8,
|
auxType: auxUInt8,
|
||||||
|
|
@ -57607,6 +57732,57 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "VPSHUFLWMasked128Merging",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 3,
|
||||||
|
resultInArg0: true,
|
||||||
|
asm: x86.AVPSHUFLW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPSHUFLWMasked256Merging",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 3,
|
||||||
|
resultInArg0: true,
|
||||||
|
asm: x86.AVPSHUFLW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPSHUFLWMasked512Merging",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 3,
|
||||||
|
resultInArg0: true,
|
||||||
|
asm: x86.AVPSHUFLW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "VPSLLDMasked128constMerging",
|
name: "VPSLLDMasked128constMerging",
|
||||||
auxType: auxUInt8,
|
auxType: auxUInt8,
|
||||||
|
|
@ -86874,6 +87050,156 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteFloat32x4",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteFloat32x8",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteFloat32x16",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteFloat64x2",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteFloat64x4",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteFloat64x8",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteInt8x16",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteInt8x32",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteInt8x64",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteInt16x8",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteInt16x16",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteInt16x32",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteInt32x4",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteInt32x8",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteInt32x16",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteInt64x2",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteInt64x4",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteInt64x8",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteUint8x16",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteUint8x32",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteUint8x64",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteUint16x8",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteUint16x16",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteUint16x32",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteUint32x4",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteUint32x8",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteUint32x16",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteUint64x2",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteUint64x4",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ConcatPermuteUint64x8",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "ConvertToInt8Int16x8",
|
name: "ConvertToInt8Int16x8",
|
||||||
argLen: 1,
|
argLen: 1,
|
||||||
|
|
@ -89757,156 +90083,6 @@ var opcodeTable = [...]opInfo{
|
||||||
commutative: true,
|
commutative: true,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "Permute2Float32x4",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Float32x8",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Float32x16",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Float64x2",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Float64x4",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Float64x8",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Int8x16",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Int8x32",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Int8x64",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Int16x8",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Int16x16",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Int16x32",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Int32x4",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Int32x8",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Int32x16",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Int64x2",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Int64x4",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Int64x8",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Uint8x16",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Uint8x32",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Uint8x64",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Uint16x8",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Uint16x16",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Uint16x32",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Uint32x4",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Uint32x8",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Uint32x16",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Uint64x2",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Uint64x4",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Permute2Uint64x8",
|
|
||||||
argLen: 3,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "PermuteFloat32x8",
|
name: "PermuteFloat32x8",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
@ -89927,26 +90103,6 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "PermuteGroupedInt8x32",
|
|
||||||
argLen: 2,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "PermuteGroupedInt8x64",
|
|
||||||
argLen: 2,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "PermuteGroupedUint8x32",
|
|
||||||
argLen: 2,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "PermuteGroupedUint8x64",
|
|
||||||
argLen: 2,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "PermuteInt8x16",
|
name: "PermuteInt8x16",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
@ -89997,6 +90153,36 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "PermuteOrZeroGroupedInt8x32",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "PermuteOrZeroGroupedInt8x64",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "PermuteOrZeroGroupedUint8x32",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "PermuteOrZeroGroupedUint8x64",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "PermuteOrZeroInt8x16",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "PermuteOrZeroUint8x16",
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "PermuteUint8x16",
|
name: "PermuteUint8x16",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
@ -91830,138 +92016,6 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 1,
|
argLen: 1,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "PermuteConstantGroupedInt32x8",
|
|
||||||
auxType: auxUInt8,
|
|
||||||
argLen: 1,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "PermuteConstantGroupedInt32x16",
|
|
||||||
auxType: auxUInt8,
|
|
||||||
argLen: 1,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "PermuteConstantGroupedUint32x8",
|
|
||||||
auxType: auxUInt8,
|
|
||||||
argLen: 1,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "PermuteConstantGroupedUint32x16",
|
|
||||||
auxType: auxUInt8,
|
|
||||||
argLen: 1,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "PermuteConstantHiGroupedInt16x16",
|
|
||||||
auxType: auxUInt8,
|
|
||||||
argLen: 1,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "PermuteConstantHiGroupedInt16x32",
|
|
||||||
auxType: auxUInt8,
|
|
||||||
argLen: 1,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "PermuteConstantHiGroupedUint16x16",
|
|
||||||
auxType: auxUInt8,
|
|
||||||
argLen: 1,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "PermuteConstantHiGroupedUint16x32",
|
|
||||||
auxType: auxUInt8,
|
|
||||||
argLen: 1,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "PermuteConstantHiInt16x8",
|
|
||||||
auxType: auxUInt8,
|
|
||||||
argLen: 1,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "PermuteConstantHiInt32x4",
|
|
||||||
auxType: auxUInt8,
|
|
||||||
argLen: 1,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "PermuteConstantHiUint16x8",
|
|
||||||
auxType: auxUInt8,
|
|
||||||
argLen: 1,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "PermuteConstantHiUint32x4",
|
|
||||||
auxType: auxUInt8,
|
|
||||||
argLen: 1,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "PermuteConstantInt32x4",
|
|
||||||
auxType: auxUInt8,
|
|
||||||
argLen: 1,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "PermuteConstantLoGroupedInt16x16",
|
|
||||||
auxType: auxUInt8,
|
|
||||||
argLen: 1,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "PermuteConstantLoGroupedInt16x32",
|
|
||||||
auxType: auxUInt8,
|
|
||||||
argLen: 1,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "PermuteConstantLoGroupedUint16x16",
|
|
||||||
auxType: auxUInt8,
|
|
||||||
argLen: 1,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "PermuteConstantLoGroupedUint16x32",
|
|
||||||
auxType: auxUInt8,
|
|
||||||
argLen: 1,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "PermuteConstantLoInt16x8",
|
|
||||||
auxType: auxUInt8,
|
|
||||||
argLen: 1,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "PermuteConstantLoInt32x4",
|
|
||||||
auxType: auxUInt8,
|
|
||||||
argLen: 1,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "PermuteConstantLoUint16x8",
|
|
||||||
auxType: auxUInt8,
|
|
||||||
argLen: 1,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "PermuteConstantLoUint32x4",
|
|
||||||
auxType: auxUInt8,
|
|
||||||
argLen: 1,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "PermuteConstantUint32x4",
|
|
||||||
auxType: auxUInt8,
|
|
||||||
argLen: 1,
|
|
||||||
generic: true,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "RotateAllLeftInt32x4",
|
name: "RotateAllLeftInt32x4",
|
||||||
auxType: auxUInt8,
|
auxType: auxUInt8,
|
||||||
|
|
@ -92676,6 +92730,114 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "permuteScalarsGroupedInt32x8",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "permuteScalarsGroupedInt32x16",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "permuteScalarsGroupedUint32x8",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "permuteScalarsGroupedUint32x16",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "permuteScalarsHiGroupedInt16x16",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "permuteScalarsHiGroupedInt16x32",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "permuteScalarsHiGroupedUint16x16",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "permuteScalarsHiGroupedUint16x32",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "permuteScalarsHiInt16x8",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "permuteScalarsHiUint16x8",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "permuteScalarsInt32x4",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "permuteScalarsLoGroupedInt16x16",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "permuteScalarsLoGroupedInt16x32",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "permuteScalarsLoGroupedUint16x16",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "permuteScalarsLoGroupedUint16x32",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "permuteScalarsLoInt16x8",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "permuteScalarsLoUint16x8",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "permuteScalarsUint32x4",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "ternInt32x4",
|
name: "ternInt32x4",
|
||||||
auxType: auxUInt8,
|
auxType: auxUInt8,
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -228,6 +228,36 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||||
addF(simdPackage, "Uint64x2.Compress", opLen2(ssa.OpCompressUint64x2, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Uint64x2.Compress", opLen2(ssa.OpCompressUint64x2, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Uint64x4.Compress", opLen2(ssa.OpCompressUint64x4, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Uint64x4.Compress", opLen2(ssa.OpCompressUint64x4, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Uint64x8.Compress", opLen2(ssa.OpCompressUint64x8, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Uint64x8.Compress", opLen2(ssa.OpCompressUint64x8, types.TypeVec512), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int8x16.ConcatPermute", opLen3_231(ssa.OpConcatPermuteInt8x16, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint8x16.ConcatPermute", opLen3_231(ssa.OpConcatPermuteUint8x16, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int8x32.ConcatPermute", opLen3_231(ssa.OpConcatPermuteInt8x32, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint8x32.ConcatPermute", opLen3_231(ssa.OpConcatPermuteUint8x32, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int8x64.ConcatPermute", opLen3_231(ssa.OpConcatPermuteInt8x64, types.TypeVec512), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint8x64.ConcatPermute", opLen3_231(ssa.OpConcatPermuteUint8x64, types.TypeVec512), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int16x8.ConcatPermute", opLen3_231(ssa.OpConcatPermuteInt16x8, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint16x8.ConcatPermute", opLen3_231(ssa.OpConcatPermuteUint16x8, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int16x16.ConcatPermute", opLen3_231(ssa.OpConcatPermuteInt16x16, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint16x16.ConcatPermute", opLen3_231(ssa.OpConcatPermuteUint16x16, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int16x32.ConcatPermute", opLen3_231(ssa.OpConcatPermuteInt16x32, types.TypeVec512), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint16x32.ConcatPermute", opLen3_231(ssa.OpConcatPermuteUint16x32, types.TypeVec512), sys.AMD64)
|
||||||
|
addF(simdPackage, "Float32x4.ConcatPermute", opLen3_231(ssa.OpConcatPermuteFloat32x4, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int32x4.ConcatPermute", opLen3_231(ssa.OpConcatPermuteInt32x4, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint32x4.ConcatPermute", opLen3_231(ssa.OpConcatPermuteUint32x4, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "Float32x8.ConcatPermute", opLen3_231(ssa.OpConcatPermuteFloat32x8, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int32x8.ConcatPermute", opLen3_231(ssa.OpConcatPermuteInt32x8, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint32x8.ConcatPermute", opLen3_231(ssa.OpConcatPermuteUint32x8, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Float32x16.ConcatPermute", opLen3_231(ssa.OpConcatPermuteFloat32x16, types.TypeVec512), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int32x16.ConcatPermute", opLen3_231(ssa.OpConcatPermuteInt32x16, types.TypeVec512), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint32x16.ConcatPermute", opLen3_231(ssa.OpConcatPermuteUint32x16, types.TypeVec512), sys.AMD64)
|
||||||
|
addF(simdPackage, "Float64x2.ConcatPermute", opLen3_231(ssa.OpConcatPermuteFloat64x2, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int64x2.ConcatPermute", opLen3_231(ssa.OpConcatPermuteInt64x2, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint64x2.ConcatPermute", opLen3_231(ssa.OpConcatPermuteUint64x2, types.TypeVec128), sys.AMD64)
|
||||||
|
addF(simdPackage, "Float64x4.ConcatPermute", opLen3_231(ssa.OpConcatPermuteFloat64x4, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int64x4.ConcatPermute", opLen3_231(ssa.OpConcatPermuteInt64x4, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint64x4.ConcatPermute", opLen3_231(ssa.OpConcatPermuteUint64x4, types.TypeVec256), sys.AMD64)
|
||||||
|
addF(simdPackage, "Float64x8.ConcatPermute", opLen3_231(ssa.OpConcatPermuteFloat64x8, types.TypeVec512), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int64x8.ConcatPermute", opLen3_231(ssa.OpConcatPermuteInt64x8, types.TypeVec512), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint64x8.ConcatPermute", opLen3_231(ssa.OpConcatPermuteUint64x8, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Uint8x16.ConcatShiftBytesRight", opLen2Imm8(ssa.OpConcatShiftBytesRightUint8x16, types.TypeVec128, 0), sys.AMD64)
|
addF(simdPackage, "Uint8x16.ConcatShiftBytesRight", opLen2Imm8(ssa.OpConcatShiftBytesRightUint8x16, types.TypeVec128, 0), sys.AMD64)
|
||||||
addF(simdPackage, "Uint8x32.ConcatShiftBytesRightGrouped", opLen2Imm8(ssa.OpConcatShiftBytesRightGroupedUint8x32, types.TypeVec256, 0), sys.AMD64)
|
addF(simdPackage, "Uint8x32.ConcatShiftBytesRightGrouped", opLen2Imm8(ssa.OpConcatShiftBytesRightGroupedUint8x32, types.TypeVec256, 0), sys.AMD64)
|
||||||
addF(simdPackage, "Uint8x64.ConcatShiftBytesRightGrouped", opLen2Imm8(ssa.OpConcatShiftBytesRightGroupedUint8x64, types.TypeVec512, 0), sys.AMD64)
|
addF(simdPackage, "Uint8x64.ConcatShiftBytesRightGrouped", opLen2Imm8(ssa.OpConcatShiftBytesRightGroupedUint8x64, types.TypeVec512, 0), sys.AMD64)
|
||||||
|
|
@ -802,8 +832,8 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||||
addF(simdPackage, "Uint64x2.Or", opLen2(ssa.OpOrUint64x2, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Uint64x2.Or", opLen2(ssa.OpOrUint64x2, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Uint64x4.Or", opLen2(ssa.OpOrUint64x4, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Uint64x4.Or", opLen2(ssa.OpOrUint64x4, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Uint64x8.Or", opLen2(ssa.OpOrUint64x8, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Uint64x8.Or", opLen2(ssa.OpOrUint64x8, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Int8x16.Permute", opLen2(ssa.OpPermuteInt8x16, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int8x16.Permute", opLen2_21(ssa.OpPermuteInt8x16, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Uint8x16.Permute", opLen2(ssa.OpPermuteUint8x16, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Uint8x16.Permute", opLen2_21(ssa.OpPermuteUint8x16, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int8x32.Permute", opLen2_21(ssa.OpPermuteInt8x32, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int8x32.Permute", opLen2_21(ssa.OpPermuteInt8x32, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Uint8x32.Permute", opLen2_21(ssa.OpPermuteUint8x32, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Uint8x32.Permute", opLen2_21(ssa.OpPermuteUint8x32, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Int8x64.Permute", opLen2_21(ssa.OpPermuteInt8x64, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Int8x64.Permute", opLen2_21(ssa.OpPermuteInt8x64, types.TypeVec512), sys.AMD64)
|
||||||
|
|
@ -826,62 +856,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||||
addF(simdPackage, "Float64x8.Permute", opLen2_21(ssa.OpPermuteFloat64x8, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Float64x8.Permute", opLen2_21(ssa.OpPermuteFloat64x8, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Int64x8.Permute", opLen2_21(ssa.OpPermuteInt64x8, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Int64x8.Permute", opLen2_21(ssa.OpPermuteInt64x8, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Uint64x8.Permute", opLen2_21(ssa.OpPermuteUint64x8, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Uint64x8.Permute", opLen2_21(ssa.OpPermuteUint64x8, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Int8x16.Permute2", opLen3_231(ssa.OpPermute2Int8x16, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Int8x16.PermuteOrZero", opLen2(ssa.OpPermuteOrZeroInt8x16, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Uint8x16.Permute2", opLen3_231(ssa.OpPermute2Uint8x16, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Uint8x16.PermuteOrZero", opLen2(ssa.OpPermuteOrZeroUint8x16, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Int8x32.Permute2", opLen3_231(ssa.OpPermute2Int8x32, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int8x32.PermuteOrZeroGrouped", opLen2(ssa.OpPermuteOrZeroGroupedInt8x32, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Uint8x32.Permute2", opLen3_231(ssa.OpPermute2Uint8x32, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Int8x64.PermuteOrZeroGrouped", opLen2(ssa.OpPermuteOrZeroGroupedInt8x64, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Int8x64.Permute2", opLen3_231(ssa.OpPermute2Int8x64, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Uint8x32.PermuteOrZeroGrouped", opLen2(ssa.OpPermuteOrZeroGroupedUint8x32, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Uint8x64.Permute2", opLen3_231(ssa.OpPermute2Uint8x64, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Uint8x64.PermuteOrZeroGrouped", opLen2(ssa.OpPermuteOrZeroGroupedUint8x64, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Int16x8.Permute2", opLen3_231(ssa.OpPermute2Int16x8, types.TypeVec128), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint16x8.Permute2", opLen3_231(ssa.OpPermute2Uint16x8, types.TypeVec128), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int16x16.Permute2", opLen3_231(ssa.OpPermute2Int16x16, types.TypeVec256), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint16x16.Permute2", opLen3_231(ssa.OpPermute2Uint16x16, types.TypeVec256), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int16x32.Permute2", opLen3_231(ssa.OpPermute2Int16x32, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint16x32.Permute2", opLen3_231(ssa.OpPermute2Uint16x32, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Float32x4.Permute2", opLen3_231(ssa.OpPermute2Float32x4, types.TypeVec128), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int32x4.Permute2", opLen3_231(ssa.OpPermute2Int32x4, types.TypeVec128), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint32x4.Permute2", opLen3_231(ssa.OpPermute2Uint32x4, types.TypeVec128), sys.AMD64)
|
|
||||||
addF(simdPackage, "Float32x8.Permute2", opLen3_231(ssa.OpPermute2Float32x8, types.TypeVec256), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int32x8.Permute2", opLen3_231(ssa.OpPermute2Int32x8, types.TypeVec256), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint32x8.Permute2", opLen3_231(ssa.OpPermute2Uint32x8, types.TypeVec256), sys.AMD64)
|
|
||||||
addF(simdPackage, "Float32x16.Permute2", opLen3_231(ssa.OpPermute2Float32x16, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int32x16.Permute2", opLen3_231(ssa.OpPermute2Int32x16, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint32x16.Permute2", opLen3_231(ssa.OpPermute2Uint32x16, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Float64x2.Permute2", opLen3_231(ssa.OpPermute2Float64x2, types.TypeVec128), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int64x2.Permute2", opLen3_231(ssa.OpPermute2Int64x2, types.TypeVec128), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint64x2.Permute2", opLen3_231(ssa.OpPermute2Uint64x2, types.TypeVec128), sys.AMD64)
|
|
||||||
addF(simdPackage, "Float64x4.Permute2", opLen3_231(ssa.OpPermute2Float64x4, types.TypeVec256), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int64x4.Permute2", opLen3_231(ssa.OpPermute2Int64x4, types.TypeVec256), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint64x4.Permute2", opLen3_231(ssa.OpPermute2Uint64x4, types.TypeVec256), sys.AMD64)
|
|
||||||
addF(simdPackage, "Float64x8.Permute2", opLen3_231(ssa.OpPermute2Float64x8, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int64x8.Permute2", opLen3_231(ssa.OpPermute2Int64x8, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint64x8.Permute2", opLen3_231(ssa.OpPermute2Uint64x8, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int32x4.PermuteConstant", opLen1Imm8(ssa.OpPermuteConstantInt32x4, types.TypeVec128, 0), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint32x4.PermuteConstant", opLen1Imm8(ssa.OpPermuteConstantUint32x4, types.TypeVec128, 0), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int32x8.PermuteConstantGrouped", opLen1Imm8(ssa.OpPermuteConstantGroupedInt32x8, types.TypeVec256, 0), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int32x16.PermuteConstantGrouped", opLen1Imm8(ssa.OpPermuteConstantGroupedInt32x16, types.TypeVec512, 0), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint32x8.PermuteConstantGrouped", opLen1Imm8(ssa.OpPermuteConstantGroupedUint32x8, types.TypeVec256, 0), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint32x16.PermuteConstantGrouped", opLen1Imm8(ssa.OpPermuteConstantGroupedUint32x16, types.TypeVec512, 0), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int16x8.PermuteConstantHi", opLen1Imm8(ssa.OpPermuteConstantHiInt16x8, types.TypeVec128, 0), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int32x4.PermuteConstantHi", opLen1Imm8(ssa.OpPermuteConstantHiInt32x4, types.TypeVec128, 0), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint16x8.PermuteConstantHi", opLen1Imm8(ssa.OpPermuteConstantHiUint16x8, types.TypeVec128, 0), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint32x4.PermuteConstantHi", opLen1Imm8(ssa.OpPermuteConstantHiUint32x4, types.TypeVec128, 0), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int16x16.PermuteConstantHiGrouped", opLen1Imm8(ssa.OpPermuteConstantHiGroupedInt16x16, types.TypeVec256, 0), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int16x32.PermuteConstantHiGrouped", opLen1Imm8(ssa.OpPermuteConstantHiGroupedInt16x32, types.TypeVec512, 0), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint16x16.PermuteConstantHiGrouped", opLen1Imm8(ssa.OpPermuteConstantHiGroupedUint16x16, types.TypeVec256, 0), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint16x32.PermuteConstantHiGrouped", opLen1Imm8(ssa.OpPermuteConstantHiGroupedUint16x32, types.TypeVec512, 0), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int16x8.PermuteConstantLo", opLen1Imm8(ssa.OpPermuteConstantLoInt16x8, types.TypeVec128, 0), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int32x4.PermuteConstantLo", opLen1Imm8(ssa.OpPermuteConstantLoInt32x4, types.TypeVec128, 0), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint16x8.PermuteConstantLo", opLen1Imm8(ssa.OpPermuteConstantLoUint16x8, types.TypeVec128, 0), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint32x4.PermuteConstantLo", opLen1Imm8(ssa.OpPermuteConstantLoUint32x4, types.TypeVec128, 0), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int16x16.PermuteConstantLoGrouped", opLen1Imm8(ssa.OpPermuteConstantLoGroupedInt16x16, types.TypeVec256, 0), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int16x32.PermuteConstantLoGrouped", opLen1Imm8(ssa.OpPermuteConstantLoGroupedInt16x32, types.TypeVec512, 0), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint16x16.PermuteConstantLoGrouped", opLen1Imm8(ssa.OpPermuteConstantLoGroupedUint16x16, types.TypeVec256, 0), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint16x32.PermuteConstantLoGrouped", opLen1Imm8(ssa.OpPermuteConstantLoGroupedUint16x32, types.TypeVec512, 0), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int8x32.PermuteGrouped", opLen2(ssa.OpPermuteGroupedInt8x32, types.TypeVec256), sys.AMD64)
|
|
||||||
addF(simdPackage, "Int8x64.PermuteGrouped", opLen2(ssa.OpPermuteGroupedInt8x64, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint8x32.PermuteGrouped", opLen2(ssa.OpPermuteGroupedUint8x32, types.TypeVec256), sys.AMD64)
|
|
||||||
addF(simdPackage, "Uint8x64.PermuteGrouped", opLen2(ssa.OpPermuteGroupedUint8x64, types.TypeVec512), sys.AMD64)
|
|
||||||
addF(simdPackage, "Float32x4.Reciprocal", opLen1(ssa.OpReciprocalFloat32x4, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Float32x4.Reciprocal", opLen1(ssa.OpReciprocalFloat32x4, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Float32x8.Reciprocal", opLen1(ssa.OpReciprocalFloat32x8, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Float32x8.Reciprocal", opLen1(ssa.OpReciprocalFloat32x8, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Float32x16.Reciprocal", opLen1(ssa.OpReciprocalFloat32x16, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Float32x16.Reciprocal", opLen1(ssa.OpReciprocalFloat32x16, types.TypeVec512), sys.AMD64)
|
||||||
|
|
@ -1300,6 +1280,24 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||||
addF(simdPackage, "Uint32x16.concatSelectedConstantGrouped", opLen2Imm8(ssa.OpconcatSelectedConstantGroupedUint32x16, types.TypeVec512, 0), sys.AMD64)
|
addF(simdPackage, "Uint32x16.concatSelectedConstantGrouped", opLen2Imm8(ssa.OpconcatSelectedConstantGroupedUint32x16, types.TypeVec512, 0), sys.AMD64)
|
||||||
addF(simdPackage, "Uint64x4.concatSelectedConstantGrouped", opLen2Imm8(ssa.OpconcatSelectedConstantGroupedUint64x4, types.TypeVec256, 0), sys.AMD64)
|
addF(simdPackage, "Uint64x4.concatSelectedConstantGrouped", opLen2Imm8(ssa.OpconcatSelectedConstantGroupedUint64x4, types.TypeVec256, 0), sys.AMD64)
|
||||||
addF(simdPackage, "Uint64x8.concatSelectedConstantGrouped", opLen2Imm8(ssa.OpconcatSelectedConstantGroupedUint64x8, types.TypeVec512, 0), sys.AMD64)
|
addF(simdPackage, "Uint64x8.concatSelectedConstantGrouped", opLen2Imm8(ssa.OpconcatSelectedConstantGroupedUint64x8, types.TypeVec512, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int32x4.permuteScalars", opLen1Imm8(ssa.OppermuteScalarsInt32x4, types.TypeVec128, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint32x4.permuteScalars", opLen1Imm8(ssa.OppermuteScalarsUint32x4, types.TypeVec128, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int32x8.permuteScalarsGrouped", opLen1Imm8(ssa.OppermuteScalarsGroupedInt32x8, types.TypeVec256, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int32x16.permuteScalarsGrouped", opLen1Imm8(ssa.OppermuteScalarsGroupedInt32x16, types.TypeVec512, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint32x8.permuteScalarsGrouped", opLen1Imm8(ssa.OppermuteScalarsGroupedUint32x8, types.TypeVec256, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint32x16.permuteScalarsGrouped", opLen1Imm8(ssa.OppermuteScalarsGroupedUint32x16, types.TypeVec512, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int16x8.permuteScalarsHi", opLen1Imm8(ssa.OppermuteScalarsHiInt16x8, types.TypeVec128, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint16x8.permuteScalarsHi", opLen1Imm8(ssa.OppermuteScalarsHiUint16x8, types.TypeVec128, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int16x16.permuteScalarsHiGrouped", opLen1Imm8(ssa.OppermuteScalarsHiGroupedInt16x16, types.TypeVec256, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int16x32.permuteScalarsHiGrouped", opLen1Imm8(ssa.OppermuteScalarsHiGroupedInt16x32, types.TypeVec512, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint16x16.permuteScalarsHiGrouped", opLen1Imm8(ssa.OppermuteScalarsHiGroupedUint16x16, types.TypeVec256, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint16x32.permuteScalarsHiGrouped", opLen1Imm8(ssa.OppermuteScalarsHiGroupedUint16x32, types.TypeVec512, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int16x8.permuteScalarsLo", opLen1Imm8(ssa.OppermuteScalarsLoInt16x8, types.TypeVec128, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint16x8.permuteScalarsLo", opLen1Imm8(ssa.OppermuteScalarsLoUint16x8, types.TypeVec128, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int16x16.permuteScalarsLoGrouped", opLen1Imm8(ssa.OppermuteScalarsLoGroupedInt16x16, types.TypeVec256, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int16x32.permuteScalarsLoGrouped", opLen1Imm8(ssa.OppermuteScalarsLoGroupedInt16x32, types.TypeVec512, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint16x16.permuteScalarsLoGrouped", opLen1Imm8(ssa.OppermuteScalarsLoGroupedUint16x16, types.TypeVec256, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint16x32.permuteScalarsLoGrouped", opLen1Imm8(ssa.OppermuteScalarsLoGroupedUint16x32, types.TypeVec512, 0), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x4.tern", opLen3Imm8(ssa.OpternInt32x4, types.TypeVec128, 0), sys.AMD64)
|
addF(simdPackage, "Int32x4.tern", opLen3Imm8(ssa.OpternInt32x4, types.TypeVec128, 0), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x8.tern", opLen3Imm8(ssa.OpternInt32x8, types.TypeVec256, 0), sys.AMD64)
|
addF(simdPackage, "Int32x8.tern", opLen3Imm8(ssa.OpternInt32x8, types.TypeVec256, 0), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x16.tern", opLen3Imm8(ssa.OpternInt32x16, types.TypeVec512, 0), sys.AMD64)
|
addF(simdPackage, "Int32x16.tern", opLen3Imm8(ssa.OpternInt32x16, types.TypeVec512, 0), sys.AMD64)
|
||||||
|
|
|
||||||
|
|
@ -46,6 +46,9 @@ func writeSIMDGenericOps(ops []Operation) *bytes.Buffer {
|
||||||
if op.NoGenericOps != nil && *op.NoGenericOps == "true" {
|
if op.NoGenericOps != nil && *op.NoGenericOps == "true" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if op.SkipMaskedMethod() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
_, _, _, immType, gOp := op.shape()
|
_, _, _, immType, gOp := op.shape()
|
||||||
gOpData := genericOpsData{gOp.GenericName(), len(gOp.In), op.Commutative}
|
gOpData := genericOpsData{gOp.GenericName(), len(gOp.In), op.Commutative}
|
||||||
if immType == VarImm || immType == ConstVarImm {
|
if immType == VarImm || immType == ConstVarImm {
|
||||||
|
|
|
||||||
|
|
@ -107,6 +107,9 @@ func writeSIMDIntrinsics(ops []Operation, typeMap simdTypeMap) *bytes.Buffer {
|
||||||
if op.NoTypes != nil && *op.NoTypes == "true" {
|
if op.NoTypes != nil && *op.NoTypes == "true" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if op.SkipMaskedMethod() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
if s, op, err := classifyOp(op); err == nil {
|
if s, op, err := classifyOp(op); err == nil {
|
||||||
if err := t.ExecuteTemplate(buffer, s, op); err != nil {
|
if err := t.ExecuteTemplate(buffer, s, op); err != nil {
|
||||||
panic(fmt.Errorf("failed to execute template %s for op %s: %w", s, op.Go, err))
|
panic(fmt.Errorf("failed to execute template %s for op %s: %w", s, op.Go, err))
|
||||||
|
|
|
||||||
|
|
@ -604,6 +604,9 @@ func writeSIMDStubs(ops []Operation, typeMap simdTypeMap) (f, fI *bytes.Buffer)
|
||||||
if op.NoTypes != nil && *op.NoTypes == "true" {
|
if op.NoTypes != nil && *op.NoTypes == "true" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if op.SkipMaskedMethod() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
idxVecAsScalar, err := checkVecAsScalar(op)
|
idxVecAsScalar, err := checkVecAsScalar(op)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
|
|
|
||||||
|
|
@ -345,7 +345,8 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
|
||||||
data.ArgsOut = "..."
|
data.ArgsOut = "..."
|
||||||
}
|
}
|
||||||
data.tplName = tplName
|
data.tplName = tplName
|
||||||
if opr.NoGenericOps != nil && *opr.NoGenericOps == "true" {
|
if opr.NoGenericOps != nil && *opr.NoGenericOps == "true" ||
|
||||||
|
opr.SkipMaskedMethod() {
|
||||||
optData = append(optData, data)
|
optData = append(optData, data)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -73,6 +73,29 @@ type rawOperation struct {
|
||||||
NoGenericOps *string
|
NoGenericOps *string
|
||||||
// If non-nil, this string will be attached to the machine ssa op name. E.g. "const"
|
// If non-nil, this string will be attached to the machine ssa op name. E.g. "const"
|
||||||
SSAVariant *string
|
SSAVariant *string
|
||||||
|
// If true, do not emit method declarations, generic ops, or intrinsics for masked variants
|
||||||
|
// DO emit the architecture-specific opcodes and optimizations.
|
||||||
|
HideMaskMethods *bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *Operation) IsMasked() bool {
|
||||||
|
if len(o.InVariant) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if len(o.InVariant) == 1 && o.InVariant[0].Class == "mask" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
panic(fmt.Errorf("unknown inVariant"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *Operation) SkipMaskedMethod() bool {
|
||||||
|
if o.HideMaskMethods == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if *o.HideMaskMethods && o.IsMasked() {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *Operation) DecodeUnified(v *unify.Value) error {
|
func (o *Operation) DecodeUnified(v *unify.Value) error {
|
||||||
|
|
@ -80,14 +103,7 @@ func (o *Operation) DecodeUnified(v *unify.Value) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
isMasked := false
|
isMasked := o.IsMasked()
|
||||||
if len(o.InVariant) == 0 {
|
|
||||||
// No variant
|
|
||||||
} else if len(o.InVariant) == 1 && o.InVariant[0].Class == "mask" {
|
|
||||||
isMasked = true
|
|
||||||
} else {
|
|
||||||
return fmt.Errorf("unknown inVariant")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compute full Go method name.
|
// Compute full Go method name.
|
||||||
o.Go = o.rawOperation.Go
|
o.Go = o.rawOperation.Go
|
||||||
|
|
@ -104,6 +120,7 @@ func (o *Operation) DecodeUnified(v *unify.Value) error {
|
||||||
o.Documentation = regexp.MustCompile(`\bNAME\b`).ReplaceAllString(o.Documentation, o.Go)
|
o.Documentation = regexp.MustCompile(`\bNAME\b`).ReplaceAllString(o.Documentation, o.Go)
|
||||||
if isMasked {
|
if isMasked {
|
||||||
o.Documentation += "\n//\n// This operation is applied selectively under a write mask."
|
o.Documentation += "\n//\n// This operation is applied selectively under a write mask."
|
||||||
|
// Suppress generic op and method declaration for exported methods, if a mask is present.
|
||||||
if unicode.IsUpper([]rune(o.Go)[0]) {
|
if unicode.IsUpper([]rune(o.Go)[0]) {
|
||||||
trueVal := "true"
|
trueVal := "true"
|
||||||
o.NoGenericOps = &trueVal
|
o.NoGenericOps = &trueVal
|
||||||
|
|
|
||||||
|
|
@ -27,18 +27,22 @@
|
||||||
constImm: 1
|
constImm: 1
|
||||||
documentation: !string |-
|
documentation: !string |-
|
||||||
// NAME returns the upper half of x.
|
// NAME returns the upper half of x.
|
||||||
|
- go: PermuteOrZero
|
||||||
|
commutative: false
|
||||||
|
documentation: !string |-
|
||||||
|
// NAME performs a full permutation of vector x using indices:
|
||||||
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
- go: Permute
|
- go: Permute
|
||||||
commutative: false
|
commutative: false
|
||||||
documentation: !string |-
|
documentation: !string |-
|
||||||
// NAME performs a full permutation of vector x using indices:
|
// NAME performs a full permutation of vector x using indices:
|
||||||
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
|
||||||
// Only the needed bits to represent x's index are used in indices' elements.
|
- go: ConcatPermute # ConcatPermute is only available on or after AVX512
|
||||||
- go: Permute2 # Permute2 is only available on or after AVX512
|
|
||||||
commutative: false
|
commutative: false
|
||||||
documentation: !string |-
|
documentation: !string |-
|
||||||
// NAME performs a full permutation of vector x, y using indices:
|
// NAME performs a full permutation of vector x, y using indices:
|
||||||
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
|
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
|
||||||
// where xy is x appending y.
|
// where xy is the concatenation of x (lower half) and y (upper half).
|
||||||
// Only the needed bits to represent xy's index are used in indices' elements.
|
// Only the needed bits to represent xy's index are used in indices' elements.
|
||||||
- go: Compress
|
- go: Compress
|
||||||
commutative: false
|
commutative: false
|
||||||
|
|
@ -74,31 +78,35 @@
|
||||||
documentation: !string |-
|
documentation: !string |-
|
||||||
// NAME copies element zero of its (128-bit) input to all elements of
|
// NAME copies element zero of its (128-bit) input to all elements of
|
||||||
// the 512-bit output vector.
|
// the 512-bit output vector.
|
||||||
|
- go: PermuteOrZeroGrouped
|
||||||
|
commutative: false
|
||||||
|
documentation: !string |- # Detailed documentation will rely on the specific ops.
|
||||||
|
// NAME performs a grouped permutation of vector x using indices:
|
||||||
- go: PermuteGrouped
|
- go: PermuteGrouped
|
||||||
commutative: false
|
commutative: false
|
||||||
documentation: !string |- # Detailed documentation will rely on the specific ops.
|
documentation: !string |- # Detailed documentation will rely on the specific ops.
|
||||||
// NAME performs a grouped permutation of vector x using indices:
|
// NAME performs a grouped permutation of vector x using indices:
|
||||||
- go: PermuteConstant
|
- go: permuteScalars
|
||||||
commutative: false
|
commutative: false
|
||||||
documentation: !string |- # Detailed documentation will rely on the specific ops.
|
documentation: !string |- # Detailed documentation will rely on the specific ops.
|
||||||
// NAME performs a permutation of vector x using constant indices:
|
// NAME performs a permutation of vector x using constant indices:
|
||||||
- go: PermuteConstantGrouped
|
- go: permuteScalarsGrouped
|
||||||
commutative: false
|
commutative: false
|
||||||
documentation: !string |- # Detailed documentation will rely on the specific ops.
|
documentation: !string |- # Detailed documentation will rely on the specific ops.
|
||||||
// NAME performs a grouped permutation of vector x using constant indices:
|
// NAME performs a grouped permutation of vector x using constant indices:
|
||||||
- go: PermuteConstantLo
|
- go: permuteScalarsLo
|
||||||
commutative: false
|
commutative: false
|
||||||
documentation: !string |- # Detailed documentation will rely on the specific ops.
|
documentation: !string |- # Detailed documentation will rely on the specific ops.
|
||||||
// NAME performs a permutation of vector x using constant indices:
|
// NAME performs a permutation of vector x using constant indices:
|
||||||
- go: PermuteConstantLoGrouped
|
- go: permuteScalarsLoGrouped
|
||||||
commutative: false
|
commutative: false
|
||||||
documentation: !string |- # Detailed documentation will rely on the specific ops.
|
documentation: !string |- # Detailed documentation will rely on the specific ops.
|
||||||
// NAME performs a grouped permutation of vector x using constant indices:
|
// NAME performs a grouped permutation of vector x using constant indices:
|
||||||
- go: PermuteConstantHi
|
- go: permuteScalarsHi
|
||||||
commutative: false
|
commutative: false
|
||||||
documentation: !string |- # Detailed documentation will rely on the specific ops.
|
documentation: !string |- # Detailed documentation will rely on the specific ops.
|
||||||
// NAME performs a permutation of vector x using constant indices:
|
// NAME performs a permutation of vector x using constant indices:
|
||||||
- go: PermuteConstantHiGrouped
|
- go: permuteScalarsHiGrouped
|
||||||
commutative: false
|
commutative: false
|
||||||
documentation: !string |- # Detailed documentation will rely on the specific ops.
|
documentation: !string |- # Detailed documentation will rely on the specific ops.
|
||||||
// NAME performs a grouped permutation of vector x using constant indices:
|
// NAME performs a grouped permutation of vector x using constant indices:
|
||||||
|
|
@ -218,8 +226,10 @@
|
||||||
- go: Select128FromPair
|
- go: Select128FromPair
|
||||||
commutative: false
|
commutative: false
|
||||||
documentation: !string |-
|
documentation: !string |-
|
||||||
// NAME selects the low and high 128-bit halves from the 128-bit halves
|
// NAME treats the 256-bit vectors x and y as a single vector of four
|
||||||
// of its two 256-bit inputs, numbering those halves 0, 1, 2, 3.
|
// 128-bit elements, and returns a 256-bit result formed by
|
||||||
|
// concatenating the two elements specified by lo and hi.
|
||||||
|
// For example, {4,5}.NAME(3,0,{6,7}) returns {7,4}.
|
||||||
|
|
||||||
- go: ConcatShiftBytesRight
|
- go: ConcatShiftBytesRight
|
||||||
commutative: false
|
commutative: false
|
||||||
|
|
|
||||||
|
|
@ -213,19 +213,75 @@
|
||||||
- *f64xN
|
- *f64xN
|
||||||
|
|
||||||
- go: Permute
|
- go: Permute
|
||||||
asm: "VPERM[BWDQ]|VPERMP[SD]"
|
asm: "VPERMQ|VPERMPD"
|
||||||
|
addDoc: !string |-
|
||||||
|
// The low 2 bits (values 0-3) of each element of indices is used
|
||||||
operandOrder: "21Type1"
|
operandOrder: "21Type1"
|
||||||
in:
|
in:
|
||||||
- &anyindices
|
- &anyindices
|
||||||
go: $t
|
go: $t
|
||||||
name: indices
|
name: indices
|
||||||
overwriteBase: uint
|
overwriteBase: uint
|
||||||
|
- &any4
|
||||||
|
go: $t
|
||||||
|
lanes: 4
|
||||||
|
out:
|
||||||
- &any
|
- &any
|
||||||
go: $t
|
go: $t
|
||||||
|
|
||||||
|
- go: Permute
|
||||||
|
asm: "VPERM[WDQ]|VPERMP[SD]"
|
||||||
|
addDoc: !string |-
|
||||||
|
// The low 3 bits (values 0-7) of each element of indices is used
|
||||||
|
operandOrder: "21Type1"
|
||||||
|
in:
|
||||||
|
- *anyindices
|
||||||
|
- &any8
|
||||||
|
go: $t
|
||||||
|
lanes: 8
|
||||||
out:
|
out:
|
||||||
- *any
|
- *any
|
||||||
|
|
||||||
- go: Permute2
|
- go: Permute
|
||||||
|
asm: "VPERM[BWD]|VPERMPS"
|
||||||
|
addDoc: !string |-
|
||||||
|
// The low 4 bits (values 0-15) of each element of indices is used
|
||||||
|
operandOrder: "21Type1"
|
||||||
|
in:
|
||||||
|
- *anyindices
|
||||||
|
- &any16
|
||||||
|
go: $t
|
||||||
|
lanes: 16
|
||||||
|
out:
|
||||||
|
- *any
|
||||||
|
|
||||||
|
- go: Permute
|
||||||
|
asm: "VPERM[BW]"
|
||||||
|
addDoc: !string |-
|
||||||
|
// The low 5 bits (values 0-31) of each element of indices is used
|
||||||
|
operandOrder: "21Type1"
|
||||||
|
in:
|
||||||
|
- *anyindices
|
||||||
|
- &any32
|
||||||
|
go: $t
|
||||||
|
lanes: 32
|
||||||
|
out:
|
||||||
|
- *any
|
||||||
|
|
||||||
|
- go: Permute
|
||||||
|
asm: "VPERMB"
|
||||||
|
addDoc: !string |-
|
||||||
|
// The low 6 bits (values 0-63) of each element of indices is used
|
||||||
|
operandOrder: "21Type1"
|
||||||
|
in:
|
||||||
|
- *anyindices
|
||||||
|
- &any64
|
||||||
|
go: $t
|
||||||
|
lanes: 64
|
||||||
|
out:
|
||||||
|
- *any
|
||||||
|
|
||||||
|
- go: ConcatPermute
|
||||||
asm: "VPERMI2[BWDQ]|VPERMI2P[SD]"
|
asm: "VPERMI2[BWDQ]|VPERMI2P[SD]"
|
||||||
# Because we are overwriting the receiver's type, we
|
# Because we are overwriting the receiver's type, we
|
||||||
# have to move the receiver to be a parameter so that
|
# have to move the receiver to be a parameter so that
|
||||||
|
|
@ -403,113 +459,137 @@
|
||||||
base: $b
|
base: $b
|
||||||
|
|
||||||
# VPSHUFB for 128-bit byte shuffles will be picked with higher priority than VPERMB, given its lower CPU feature requirement. (It's AVX)
|
# VPSHUFB for 128-bit byte shuffles will be picked with higher priority than VPERMB, given its lower CPU feature requirement. (It's AVX)
|
||||||
- go: Permute
|
- go: PermuteOrZero
|
||||||
asm: VPSHUFB
|
asm: VPSHUFB
|
||||||
addDoc: !string |-
|
addDoc: !string |-
|
||||||
// However when the top bit is set, the low bits will be disregard and the respective element in the result vector will be zeroed.
|
// The lower four bits of each byte-sized index in indices select an element from x,
|
||||||
|
// unless the index's sign bit is set in which case zero is used instead.
|
||||||
in:
|
in:
|
||||||
- &128any
|
- &128any
|
||||||
bits: 128
|
bits: 128
|
||||||
go: $t
|
go: $t
|
||||||
- bits: 128
|
- bits: 128
|
||||||
go: $t
|
|
||||||
name: indices
|
name: indices
|
||||||
|
base: int # always signed
|
||||||
out:
|
out:
|
||||||
- *128any
|
- *128any
|
||||||
- go: PermuteGrouped
|
|
||||||
|
- go: PermuteOrZeroGrouped
|
||||||
asm: VPSHUFB
|
asm: VPSHUFB
|
||||||
addDoc: !string |-
|
addDoc: !string |-
|
||||||
// result := {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
|
// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
|
||||||
// Only the needed bits to represent the index of a group of x are used in indices' elements.
|
// The lower four bits of each byte-sized index in indices select an element from its corresponding group in x,
|
||||||
// However when the top bit is set, the low bits will be disregard and the respective element in the result vector will be zeroed.
|
// unless the index's sign bit is set in which case zero is used instead.
|
||||||
// Each group is of size 128-bit.
|
// Each group is of size 128-bit.
|
||||||
in:
|
in:
|
||||||
- &256Or512any
|
- &256Or512any
|
||||||
bits: "256|512"
|
bits: "256|512"
|
||||||
go: $t
|
go: $t
|
||||||
- bits: "256|512"
|
- bits: "256|512"
|
||||||
|
base: int
|
||||||
|
name: indices
|
||||||
|
out:
|
||||||
|
- *256Or512any
|
||||||
|
|
||||||
|
- go: permuteScalars
|
||||||
|
asm: VPSHUFD
|
||||||
|
addDoc: !string |-
|
||||||
|
// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]}
|
||||||
|
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
|
||||||
|
in:
|
||||||
|
- *128any
|
||||||
|
- class: immediate
|
||||||
|
immOffset: 0
|
||||||
|
name: indices
|
||||||
|
hideMaskMethods: true
|
||||||
|
out:
|
||||||
|
- *128any
|
||||||
|
|
||||||
|
- go: permuteScalarsGrouped
|
||||||
|
asm: VPSHUFD
|
||||||
|
addDoc: !string |-
|
||||||
|
// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
|
||||||
|
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
|
||||||
|
// Each group is of size 128-bit.
|
||||||
|
in:
|
||||||
|
- *256Or512any
|
||||||
|
- class: immediate
|
||||||
|
immOffset: 0
|
||||||
|
name: indices
|
||||||
|
hideMaskMethods: true
|
||||||
|
out:
|
||||||
|
- *256Or512any
|
||||||
|
|
||||||
|
- go: permuteScalarsLo
|
||||||
|
asm: VPSHUFLW
|
||||||
|
addDoc: !string |-
|
||||||
|
// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]}
|
||||||
|
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
|
||||||
|
in:
|
||||||
|
- &128lanes8
|
||||||
|
bits: 128
|
||||||
go: $t
|
go: $t
|
||||||
name: indices
|
elemBits: 16
|
||||||
out:
|
|
||||||
- *256Or512any
|
|
||||||
|
|
||||||
- go: PermuteConstant
|
|
||||||
asm: VPSHUFD
|
|
||||||
addDoc: !string |-
|
|
||||||
// result := {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]}
|
|
||||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
|
||||||
in:
|
|
||||||
- *128any
|
|
||||||
- class: immediate
|
- class: immediate
|
||||||
immOffset: 0
|
immOffset: 0
|
||||||
name: indices
|
name: indices
|
||||||
|
hideMaskMethods: true
|
||||||
out:
|
out:
|
||||||
- *128any
|
- *128lanes8
|
||||||
- go: PermuteConstantGrouped
|
|
||||||
asm: VPSHUFD
|
- go: permuteScalarsLoGrouped
|
||||||
|
asm: VPSHUFLW
|
||||||
addDoc: !string |-
|
addDoc: !string |-
|
||||||
// result := {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
|
//
|
||||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x[4], x[5], x[6], x[7],
|
||||||
|
// x_group1[indices[0:2]], ...}
|
||||||
|
//
|
||||||
|
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
|
||||||
// Each group is of size 128-bit.
|
// Each group is of size 128-bit.
|
||||||
in:
|
in:
|
||||||
- *256Or512any
|
- &256Or512lanes8
|
||||||
|
bits: "256|512"
|
||||||
|
go: $t
|
||||||
|
elemBits: 16
|
||||||
- class: immediate
|
- class: immediate
|
||||||
immOffset: 0
|
immOffset: 0
|
||||||
name: indices
|
name: indices
|
||||||
|
hideMaskMethods: true
|
||||||
out:
|
out:
|
||||||
- *256Or512any
|
- *256Or512lanes8
|
||||||
|
|
||||||
- go: PermuteConstantLo
|
- go: permuteScalarsHi
|
||||||
asm: VPSHUFHW
|
asm: VPSHUFHW
|
||||||
addDoc: !string |-
|
addDoc: !string |-
|
||||||
// result := {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]}
|
// result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]}
|
||||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
|
||||||
in:
|
in:
|
||||||
- *128any
|
- *128lanes8
|
||||||
- class: immediate
|
- class: immediate
|
||||||
immOffset: 0
|
immOffset: 0
|
||||||
name: indices
|
name: indices
|
||||||
|
hideMaskMethods: true
|
||||||
out:
|
out:
|
||||||
- *128any
|
- *128lanes8
|
||||||
- go: PermuteConstantLoGrouped
|
|
||||||
|
- go: permuteScalarsHiGrouped
|
||||||
asm: VPSHUFHW
|
asm: VPSHUFHW
|
||||||
addDoc: !string |-
|
addDoc: !string |-
|
||||||
// result := {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
|
// result =
|
||||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
//
|
||||||
|
// {x_group0[0], x_group0[1], x_group0[2], x_group0[3], x_group0[indices[0:2]+4], x_group0[indices[2:4]+4], x_group0[indices[4:6]+4], x_group0[indices[6:8]+4],
|
||||||
|
// x_group1[0], x_group1[1], x_group1[2], x_group1[3], x_group1[indices[0:2]+4], ...}
|
||||||
|
//
|
||||||
|
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
|
||||||
// Each group is of size 128-bit.
|
// Each group is of size 128-bit.
|
||||||
in:
|
in:
|
||||||
- *256Or512any
|
- *256Or512lanes8
|
||||||
- class: immediate
|
- class: immediate
|
||||||
immOffset: 0
|
immOffset: 0
|
||||||
name: indices
|
name: indices
|
||||||
|
hideMaskMethods: true
|
||||||
out:
|
out:
|
||||||
- *256Or512any
|
- *256Or512lanes8
|
||||||
|
|
||||||
- go: PermuteConstantHi
|
|
||||||
asm: VPSHUFHW
|
|
||||||
addDoc: !string |-
|
|
||||||
// result := {x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]}
|
|
||||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
|
||||||
in:
|
|
||||||
- *128any
|
|
||||||
- class: immediate
|
|
||||||
immOffset: 0
|
|
||||||
name: indices
|
|
||||||
out:
|
|
||||||
- *128any
|
|
||||||
- go: PermuteConstantHiGrouped
|
|
||||||
asm: VPSHUFHW
|
|
||||||
addDoc: !string |-
|
|
||||||
// result := {x_group0[indices[0:2]+4], x_group0[indices[2:4]+4], x_group0[indices[4:6]+4], x_group0[indices[6:8]+4], x_group1[indices[0:2]+4], ...}
|
|
||||||
// Here indices are word-size unsigned index value packed together, e.g. indices[0:2] is the first index.
|
|
||||||
// Each group is of size 128-bit.
|
|
||||||
in:
|
|
||||||
- *256Or512any
|
|
||||||
- class: immediate
|
|
||||||
immOffset: 0
|
|
||||||
name: indices
|
|
||||||
out:
|
|
||||||
- *256Or512any
|
|
||||||
|
|
||||||
- go: InterleaveHi
|
- go: InterleaveHi
|
||||||
asm: VPUNPCKH(QDQ|DQ|WD|WB)
|
asm: VPUNPCKH(QDQ|DQ|WD|WB)
|
||||||
|
|
|
||||||
|
|
@ -163,7 +163,20 @@ func TestPermute(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestPermute2(t *testing.T) {
|
func TestPermuteOrZero(t *testing.T) {
|
||||||
|
x := []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
|
||||||
|
indices := []int8{7, 6, 5, 4, 3, 2, 1, 0, -1, 8, -1, 9, -1, 10, -1, 11}
|
||||||
|
want := []uint8{8, 7, 6, 5, 4, 3, 2, 1, 0, 9, 0, 10, 0, 11, 0, 12}
|
||||||
|
got := make([]uint8, len(x))
|
||||||
|
simd.LoadUint8x16Slice(x).PermuteOrZero(simd.LoadInt8x16Slice(indices)).StoreSlice(got)
|
||||||
|
for i := range 8 {
|
||||||
|
if want[i] != got[i] {
|
||||||
|
t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConcatPermute(t *testing.T) {
|
||||||
if !simd.X86.AVX512() {
|
if !simd.X86.AVX512() {
|
||||||
t.Skip("Test requires X86.AVX512, not available on this hardware")
|
t.Skip("Test requires X86.AVX512, not available on this hardware")
|
||||||
return
|
return
|
||||||
|
|
@ -173,7 +186,7 @@ func TestPermute2(t *testing.T) {
|
||||||
indices := []uint64{7 + 8, 6, 5 + 8, 4, 3 + 8, 2, 1 + 8, 0}
|
indices := []uint64{7 + 8, 6, 5 + 8, 4, 3 + 8, 2, 1 + 8, 0}
|
||||||
want := []int64{-8, 7, -6, 5, -4, 3, -2, 1}
|
want := []int64{-8, 7, -6, 5, -4, 3, -2, 1}
|
||||||
got := make([]int64, 8)
|
got := make([]int64, 8)
|
||||||
simd.LoadInt64x8Slice(x).Permute2(simd.LoadInt64x8Slice(y), simd.LoadUint64x8Slice(indices)).StoreSlice(got)
|
simd.LoadInt64x8Slice(x).ConcatPermute(simd.LoadInt64x8Slice(y), simd.LoadUint64x8Slice(indices)).StoreSlice(got)
|
||||||
for i := range 8 {
|
for i := range 8 {
|
||||||
if want[i] != got[i] {
|
if want[i] != got[i] {
|
||||||
t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
|
t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
|
||||||
|
|
@ -1161,3 +1174,75 @@ func TestDotProductQuadruple(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestPermuteScalars(t *testing.T) {
|
||||||
|
x := []int32{11, 12, 13, 14}
|
||||||
|
want := []int32{12, 13, 14, 11}
|
||||||
|
got := make([]int32, 4)
|
||||||
|
simd.LoadInt32x4Slice(x).PermuteScalars(1, 2, 3, 0).StoreSlice(got)
|
||||||
|
for i := range 4 {
|
||||||
|
if want[i] != got[i] {
|
||||||
|
t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPermuteScalarsGrouped(t *testing.T) {
|
||||||
|
x := []int32{11, 12, 13, 14, 21, 22, 23, 24}
|
||||||
|
want := []int32{12, 13, 14, 11, 22, 23, 24, 21}
|
||||||
|
got := make([]int32, 8)
|
||||||
|
simd.LoadInt32x8Slice(x).PermuteScalarsGrouped(1, 2, 3, 0).StoreSlice(got)
|
||||||
|
for i := range 8 {
|
||||||
|
if want[i] != got[i] {
|
||||||
|
t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPermuteScalarsHi(t *testing.T) {
|
||||||
|
x := []int16{-1, -2, -3, -4, 11, 12, 13, 14}
|
||||||
|
want := []int16{-1, -2, -3, -4, 12, 13, 14, 11}
|
||||||
|
got := make([]int16, len(x))
|
||||||
|
simd.LoadInt16x8Slice(x).PermuteScalarsHi(1, 2, 3, 0).StoreSlice(got)
|
||||||
|
for i := range got {
|
||||||
|
if want[i] != got[i] {
|
||||||
|
t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPermuteScalarsLo(t *testing.T) {
|
||||||
|
x := []int16{11, 12, 13, 14, 4, 5, 6, 7}
|
||||||
|
want := []int16{12, 13, 14, 11, 4, 5, 6, 7}
|
||||||
|
got := make([]int16, len(x))
|
||||||
|
simd.LoadInt16x8Slice(x).PermuteScalarsLo(1, 2, 3, 0).StoreSlice(got)
|
||||||
|
for i := range got {
|
||||||
|
if want[i] != got[i] {
|
||||||
|
t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPermuteScalarsHiGrouped(t *testing.T) {
|
||||||
|
x := []int16{-1, -2, -3, -4, 11, 12, 13, 14, -11, -12, -13, -14, 111, 112, 113, 114}
|
||||||
|
want := []int16{-1, -2, -3, -4, 12, 13, 14, 11, -11, -12, -13, -14, 112, 113, 114, 111}
|
||||||
|
got := make([]int16, len(x))
|
||||||
|
simd.LoadInt16x16Slice(x).PermuteScalarsHiGrouped(1, 2, 3, 0).StoreSlice(got)
|
||||||
|
for i := range got {
|
||||||
|
if want[i] != got[i] {
|
||||||
|
t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPermuteScalarsLoGrouped(t *testing.T) {
|
||||||
|
x := []int16{11, 12, 13, 14, 4, 5, 6, 7, 111, 112, 113, 114, 14, 15, 16, 17}
|
||||||
|
want := []int16{12, 13, 14, 11, 4, 5, 6, 7, 112, 113, 114, 111, 14, 15, 16, 17}
|
||||||
|
got := make([]int16, len(x))
|
||||||
|
simd.LoadInt16x16Slice(x).PermuteScalarsLoGrouped(1, 2, 3, 0).StoreSlice(got)
|
||||||
|
for i := range got {
|
||||||
|
if want[i] != got[i] {
|
||||||
|
t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -338,6 +338,220 @@ func (x Uint64x4) concatSelectedConstantGrouped(hilos uint8, y Uint64x4) Uint64x
|
||||||
// Asm: VSHUFPD, CPU Feature: AVX512
|
// Asm: VSHUFPD, CPU Feature: AVX512
|
||||||
func (x Uint64x8) concatSelectedConstantGrouped(hilos uint8, y Uint64x8) Uint64x8
|
func (x Uint64x8) concatSelectedConstantGrouped(hilos uint8, y Uint64x8) Uint64x8
|
||||||
|
|
||||||
|
/* permuteScalars */
|
||||||
|
|
||||||
|
// permuteScalars performs a permutation of vector x using constant indices:
|
||||||
|
// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]}
|
||||||
|
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
|
||||||
|
//
|
||||||
|
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFD, CPU Feature: AVX
|
||||||
|
func (x Int32x4) permuteScalars(indices uint8) Int32x4
|
||||||
|
|
||||||
|
// permuteScalars performs a permutation of vector x using constant indices:
|
||||||
|
// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]}
|
||||||
|
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
|
||||||
|
//
|
||||||
|
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFD, CPU Feature: AVX
|
||||||
|
func (x Uint32x4) permuteScalars(indices uint8) Uint32x4
|
||||||
|
|
||||||
|
/* permuteScalarsGrouped */
|
||||||
|
|
||||||
|
// permuteScalarsGrouped performs a grouped permutation of vector x using constant indices:
|
||||||
|
// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
|
||||||
|
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
|
||||||
|
// Each group is of size 128-bit.
|
||||||
|
//
|
||||||
|
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFD, CPU Feature: AVX2
|
||||||
|
func (x Int32x8) permuteScalarsGrouped(indices uint8) Int32x8
|
||||||
|
|
||||||
|
// permuteScalarsGrouped performs a grouped permutation of vector x using constant indices:
|
||||||
|
// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
|
||||||
|
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
|
||||||
|
// Each group is of size 128-bit.
|
||||||
|
//
|
||||||
|
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFD, CPU Feature: AVX512
|
||||||
|
func (x Int32x16) permuteScalarsGrouped(indices uint8) Int32x16
|
||||||
|
|
||||||
|
// permuteScalarsGrouped performs a grouped permutation of vector x using constant indices:
|
||||||
|
// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
|
||||||
|
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
|
||||||
|
// Each group is of size 128-bit.
|
||||||
|
//
|
||||||
|
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFD, CPU Feature: AVX2
|
||||||
|
func (x Uint32x8) permuteScalarsGrouped(indices uint8) Uint32x8
|
||||||
|
|
||||||
|
// permuteScalarsGrouped performs a grouped permutation of vector x using constant indices:
|
||||||
|
// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
|
||||||
|
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
|
||||||
|
// Each group is of size 128-bit.
|
||||||
|
//
|
||||||
|
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFD, CPU Feature: AVX512
|
||||||
|
func (x Uint32x16) permuteScalarsGrouped(indices uint8) Uint32x16
|
||||||
|
|
||||||
|
/* permuteScalarsHi */
|
||||||
|
|
||||||
|
// permuteScalarsHi performs a permutation of vector x using constant indices:
|
||||||
|
// result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]}
|
||||||
|
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
|
||||||
|
//
|
||||||
|
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFHW, CPU Feature: AVX512
|
||||||
|
func (x Int16x8) permuteScalarsHi(indices uint8) Int16x8
|
||||||
|
|
||||||
|
// permuteScalarsHi performs a permutation of vector x using constant indices:
|
||||||
|
// result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]}
|
||||||
|
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
|
||||||
|
//
|
||||||
|
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFHW, CPU Feature: AVX512
|
||||||
|
func (x Uint16x8) permuteScalarsHi(indices uint8) Uint16x8
|
||||||
|
|
||||||
|
/* permuteScalarsHiGrouped */
|
||||||
|
|
||||||
|
// permuteScalarsHiGrouped performs a grouped permutation of vector x using constant indices:
|
||||||
|
// result =
|
||||||
|
//
|
||||||
|
// {x_group0[0], x_group0[1], x_group0[2], x_group0[3], x_group0[indices[0:2]+4], x_group0[indices[2:4]+4], x_group0[indices[4:6]+4], x_group0[indices[6:8]+4],
|
||||||
|
// x_group1[0], x_group1[1], x_group1[2], x_group1[3], x_group1[indices[0:2]+4], ...}
|
||||||
|
//
|
||||||
|
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
|
||||||
|
// Each group is of size 128-bit.
|
||||||
|
//
|
||||||
|
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFHW, CPU Feature: AVX2
|
||||||
|
func (x Int16x16) permuteScalarsHiGrouped(indices uint8) Int16x16
|
||||||
|
|
||||||
|
// permuteScalarsHiGrouped performs a grouped permutation of vector x using constant indices:
|
||||||
|
// result =
|
||||||
|
//
|
||||||
|
// {x_group0[0], x_group0[1], x_group0[2], x_group0[3], x_group0[indices[0:2]+4], x_group0[indices[2:4]+4], x_group0[indices[4:6]+4], x_group0[indices[6:8]+4],
|
||||||
|
// x_group1[0], x_group1[1], x_group1[2], x_group1[3], x_group1[indices[0:2]+4], ...}
|
||||||
|
//
|
||||||
|
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
|
||||||
|
// Each group is of size 128-bit.
|
||||||
|
//
|
||||||
|
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFHW, CPU Feature: AVX512
|
||||||
|
func (x Int16x32) permuteScalarsHiGrouped(indices uint8) Int16x32
|
||||||
|
|
||||||
|
// permuteScalarsHiGrouped performs a grouped permutation of vector x using constant indices:
|
||||||
|
// result =
|
||||||
|
//
|
||||||
|
// {x_group0[0], x_group0[1], x_group0[2], x_group0[3], x_group0[indices[0:2]+4], x_group0[indices[2:4]+4], x_group0[indices[4:6]+4], x_group0[indices[6:8]+4],
|
||||||
|
// x_group1[0], x_group1[1], x_group1[2], x_group1[3], x_group1[indices[0:2]+4], ...}
|
||||||
|
//
|
||||||
|
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
|
||||||
|
// Each group is of size 128-bit.
|
||||||
|
//
|
||||||
|
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFHW, CPU Feature: AVX2
|
||||||
|
func (x Uint16x16) permuteScalarsHiGrouped(indices uint8) Uint16x16
|
||||||
|
|
||||||
|
// permuteScalarsHiGrouped performs a grouped permutation of vector x using constant indices:
|
||||||
|
// result =
|
||||||
|
//
|
||||||
|
// {x_group0[0], x_group0[1], x_group0[2], x_group0[3], x_group0[indices[0:2]+4], x_group0[indices[2:4]+4], x_group0[indices[4:6]+4], x_group0[indices[6:8]+4],
|
||||||
|
// x_group1[0], x_group1[1], x_group1[2], x_group1[3], x_group1[indices[0:2]+4], ...}
|
||||||
|
//
|
||||||
|
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
|
||||||
|
// Each group is of size 128-bit.
|
||||||
|
//
|
||||||
|
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFHW, CPU Feature: AVX512
|
||||||
|
func (x Uint16x32) permuteScalarsHiGrouped(indices uint8) Uint16x32
|
||||||
|
|
||||||
|
/* permuteScalarsLo */
|
||||||
|
|
||||||
|
// permuteScalarsLo performs a permutation of vector x using constant indices:
|
||||||
|
// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]}
|
||||||
|
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
|
||||||
|
//
|
||||||
|
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFLW, CPU Feature: AVX512
|
||||||
|
func (x Int16x8) permuteScalarsLo(indices uint8) Int16x8
|
||||||
|
|
||||||
|
// permuteScalarsLo performs a permutation of vector x using constant indices:
|
||||||
|
// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]}
|
||||||
|
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
|
||||||
|
//
|
||||||
|
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFLW, CPU Feature: AVX512
|
||||||
|
func (x Uint16x8) permuteScalarsLo(indices uint8) Uint16x8
|
||||||
|
|
||||||
|
/* permuteScalarsLoGrouped */
|
||||||
|
|
||||||
|
// permuteScalarsLoGrouped performs a grouped permutation of vector x using constant indices:
|
||||||
|
//
|
||||||
|
// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x[4], x[5], x[6], x[7],
|
||||||
|
// x_group1[indices[0:2]], ...}
|
||||||
|
//
|
||||||
|
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
|
||||||
|
// Each group is of size 128-bit.
|
||||||
|
//
|
||||||
|
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFLW, CPU Feature: AVX2
|
||||||
|
func (x Int16x16) permuteScalarsLoGrouped(indices uint8) Int16x16
|
||||||
|
|
||||||
|
// permuteScalarsLoGrouped performs a grouped permutation of vector x using constant indices:
|
||||||
|
//
|
||||||
|
// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x[4], x[5], x[6], x[7],
|
||||||
|
// x_group1[indices[0:2]], ...}
|
||||||
|
//
|
||||||
|
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
|
||||||
|
// Each group is of size 128-bit.
|
||||||
|
//
|
||||||
|
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFLW, CPU Feature: AVX512
|
||||||
|
func (x Int16x32) permuteScalarsLoGrouped(indices uint8) Int16x32
|
||||||
|
|
||||||
|
// permuteScalarsLoGrouped performs a grouped permutation of vector x using constant indices:
|
||||||
|
//
|
||||||
|
// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x[4], x[5], x[6], x[7],
|
||||||
|
// x_group1[indices[0:2]], ...}
|
||||||
|
//
|
||||||
|
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
|
||||||
|
// Each group is of size 128-bit.
|
||||||
|
//
|
||||||
|
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFLW, CPU Feature: AVX2
|
||||||
|
func (x Uint16x16) permuteScalarsLoGrouped(indices uint8) Uint16x16
|
||||||
|
|
||||||
|
// permuteScalarsLoGrouped performs a grouped permutation of vector x using constant indices:
|
||||||
|
//
|
||||||
|
// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x[4], x[5], x[6], x[7],
|
||||||
|
// x_group1[indices[0:2]], ...}
|
||||||
|
//
|
||||||
|
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
|
||||||
|
// Each group is of size 128-bit.
|
||||||
|
//
|
||||||
|
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFLW, CPU Feature: AVX512
|
||||||
|
func (x Uint16x32) permuteScalarsLoGrouped(indices uint8) Uint16x32
|
||||||
|
|
||||||
/* tern */
|
/* tern */
|
||||||
|
|
||||||
// tern performs a logical operation on three vectors based on the 8-bit truth table.
|
// tern performs a logical operation on three vectors based on the 8-bit truth table.
|
||||||
|
|
|
||||||
|
|
@ -989,3 +989,280 @@ func (x Int64x8) SelectFromPairGrouped(a, b uint8, y Int64x8) Int64x8 {
|
||||||
}
|
}
|
||||||
panic("missing case, switch should be exhaustive")
|
panic("missing case, switch should be exhaustive")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* PermuteScalars */
|
||||||
|
|
||||||
|
// PermuteScalars performs a permutation of vector x's elements using the supplied indices:
|
||||||
|
//
|
||||||
|
// result = {x[a], x[b], x[c], x[d]}
|
||||||
|
//
|
||||||
|
// Parameters a,b,c,d should have values between 0 and 3.
|
||||||
|
// If a through d are constants, then an instruction will be inlined, otherwise
|
||||||
|
// a jump table may be generated.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFD, CPU Feature: AVX
|
||||||
|
func (x Int32x4) PermuteScalars(a, b, c, d uint8) Int32x4 {
|
||||||
|
return x.permuteScalars(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
|
||||||
|
}
|
||||||
|
|
||||||
|
// PermuteScalars performs a permutation of vector x's elements using the supplied indices:
|
||||||
|
//
|
||||||
|
// result = {x[a], x[b], x[c], x[d]}
|
||||||
|
//
|
||||||
|
// Parameters a,b,c,d should have values between 0 and 3.
|
||||||
|
// If a through d are constants, then an instruction will be inlined, otherwise
|
||||||
|
// a jump table may be generated.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFD, CPU Feature: AVX
|
||||||
|
func (x Uint32x4) PermuteScalars(a, b, c, d uint8) Uint32x4 {
|
||||||
|
return x.permuteScalars(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* PermuteScalarsGrouped */
|
||||||
|
|
||||||
|
// PermuteScalarsGrouped performs a grouped permutation of vector x using the supplied indices:
|
||||||
|
//
|
||||||
|
// result = {x[a], x[b], x[c], x[d], x[a+4], x[b+4], x[c+4], x[d+4]}
|
||||||
|
//
|
||||||
|
// Parameters a,b,c,d should have values between 0 and 3.
|
||||||
|
// If a through d are constants, then an instruction will be inlined, otherwise
|
||||||
|
// a jump table may be generated.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFD, CPU Feature: AVX2
|
||||||
|
func (x Int32x8) PermuteScalarsGrouped(a, b, c, d uint8) Int32x8 {
|
||||||
|
return x.permuteScalarsGrouped(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
|
||||||
|
}
|
||||||
|
|
||||||
|
// PermuteScalarsGrouped performs a grouped permutation of vector x using the supplied indices:
|
||||||
|
//
|
||||||
|
// result =
|
||||||
|
// { x[a], x[b], x[c], x[d], x[a+4], x[b+4], x[c+4], x[d+4],
|
||||||
|
// x[a+8], x[b+8], x[c+8], x[d+8], x[a+12], x[b+12], x[c+12], x[d+12]}
|
||||||
|
//
|
||||||
|
// Parameters a,b,c,d should have values between 0 and 3.
|
||||||
|
// If a through d are constants, then an instruction will be inlined, otherwise
|
||||||
|
// a jump table may be generated.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFD, CPU Feature: AVX512
|
||||||
|
func (x Int32x16) PermuteScalarsGrouped(a, b, c, d uint8) Int32x16 {
|
||||||
|
return x.permuteScalarsGrouped(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
|
||||||
|
}
|
||||||
|
|
||||||
|
// PermuteScalarsGrouped performs a grouped permutation of vector x using the supplied indices:
|
||||||
|
//
|
||||||
|
// result = {x[a], x[b], x[c], x[d], x[a+4], x[b+4], x[c+4], x[d+4]}
|
||||||
|
//
|
||||||
|
// Parameters a,b,c,d should have values between 0 and 3.
|
||||||
|
// If a through d are constants, then an instruction will be inlined, otherwise
|
||||||
|
// a jump table is generated.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFD, CPU Feature: AVX2
|
||||||
|
func (x Uint32x8) PermuteScalarsGrouped(a, b, c, d uint8) Uint32x8 {
|
||||||
|
return x.permuteScalarsGrouped(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
|
||||||
|
}
|
||||||
|
|
||||||
|
// PermuteScalarsGrouped performs a grouped permutation of vector x using the supplied indices:
|
||||||
|
//
|
||||||
|
// result =
|
||||||
|
// { x[a], x[b], x[c], x[d], x[a+4], x[b+4], x[c+4], x[d+4],
|
||||||
|
// x[a+8], x[b+8], x[c+8], x[d+8], x[a+12], x[b+12], x[c+12], x[d+12]}
|
||||||
|
//
|
||||||
|
// Parameters a,b,c,d should have values between 0 and 3.
|
||||||
|
// If a through d are constants, then an instruction will be inlined, otherwise
|
||||||
|
// a jump table is generated.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFD, CPU Feature: AVX512
|
||||||
|
func (x Uint32x16) PermuteScalarsGrouped(a, b, c, d uint8) Uint32x16 {
|
||||||
|
return x.permuteScalarsGrouped(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* PermuteScalarsHi */
|
||||||
|
|
||||||
|
// PermuteScalarsHi performs a permutation of vector x using the supplied indices:
|
||||||
|
//
|
||||||
|
// result = {x[0], x[1], x[2], x[3], x[a+4], x[b+4], x[c+4], x[d+4]}
|
||||||
|
//
|
||||||
|
// Parameters a,b,c,d should have values between 0 and 3.
|
||||||
|
// If a through d are constants, then an instruction will be inlined, otherwise
|
||||||
|
// a jump table is generated.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFHW, CPU Feature: AVX512
|
||||||
|
func (x Int16x8) PermuteScalarsHi(a, b, c, d uint8) Int16x8 {
|
||||||
|
return x.permuteScalarsHi(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
|
||||||
|
}
|
||||||
|
|
||||||
|
// PermuteScalarsHi performs a permutation of vector x using the supplied indices:
|
||||||
|
//
|
||||||
|
// result = {x[0], x[1], x[2], x[3], x[a+4], x[b+4], x[c+4], x[d+4]}
|
||||||
|
//
|
||||||
|
// Parameters a,b,c,d should have values between 0 and 3.
|
||||||
|
// If a through d are constants, then an instruction will be inlined, otherwise
|
||||||
|
// a jump table is generated.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFHW, CPU Feature: AVX512
|
||||||
|
func (x Uint16x8) PermuteScalarsHi(a, b, c, d uint8) Uint16x8 {
|
||||||
|
return x.permuteScalarsHi(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* PermuteScalarsHiGrouped */
|
||||||
|
|
||||||
|
// PermuteScalarsHiGrouped performs a grouped permutation of vector x using the supplied indices:
|
||||||
|
//
|
||||||
|
// result =
|
||||||
|
// {x[0], x[1], x[2], x[3], x[a+4], x[b+4], x[c+4], x[d+4],
|
||||||
|
// x[8], x[9], x[10], x[11], x[a+12], x[b+12], x[c+12], x[d+12]}
|
||||||
|
//
|
||||||
|
// Parameters a,b,c,d should have values between 0 and 3.
|
||||||
|
// If a through d are constants, then an instruction will be inlined, otherwise
|
||||||
|
// a jump table is generated.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFHW, CPU Feature: AVX2
|
||||||
|
func (x Int16x16) PermuteScalarsHiGrouped(a, b, c, d uint8) Int16x16 {
|
||||||
|
return x.permuteScalarsHiGrouped(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
|
||||||
|
}
|
||||||
|
|
||||||
|
// PermuteScalarsHiGrouped performs a grouped permutation of vector x using the supplied indices:
|
||||||
|
//
|
||||||
|
// result =
|
||||||
|
// {x[0], x[1], x[2], x[3], x[a+4], x[b+4], x[c+4], x[d+4],
|
||||||
|
// x[8], x[9], x[10], x[11], x[a+12], x[b+12], x[c+12], x[d+12],
|
||||||
|
// x[16], x[17], x[18], x[19], x[a+20], x[b+20], x[c+20], x[d+20],
|
||||||
|
// x[24], x[25], x[26], x[27], x[a+28], x[b+28], x[c+28], x[d+28]}
|
||||||
|
//
|
||||||
|
// Parameters a,b,c,d should have values between 0 and 3.
|
||||||
|
// If a through d are constants, then an instruction will be inlined, otherwise
|
||||||
|
// a jump table is generated.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFHW, CPU Feature: AVX512
|
||||||
|
func (x Int16x32) PermuteScalarsHiGrouped(a, b, c, d uint8) Int16x32 {
|
||||||
|
return x.permuteScalarsHiGrouped(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
|
||||||
|
}
|
||||||
|
|
||||||
|
// PermuteScalarsHiGrouped performs a grouped permutation of vector x using the supplied indices:
|
||||||
|
//
|
||||||
|
// result =
|
||||||
|
// {x[0], x[1], x[2], x[3], x[a+4], x[b+4], x[c+4], x[d+4],
|
||||||
|
// x[8], x[9], x[10], x[11], x[a+12], x[b+12], x[c+12], x[d+12]}
|
||||||
|
//
|
||||||
|
// Each group is of size 128-bit.
|
||||||
|
//
|
||||||
|
// Parameters a,b,c,d should have values between 0 and 3.
|
||||||
|
// If a through d are constants, then an instruction will be inlined, otherwise
|
||||||
|
// a jump table is generated.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFHW, CPU Feature: AVX2
|
||||||
|
func (x Uint16x16) PermuteScalarsHiGrouped(a, b, c, d uint8) Uint16x16 {
|
||||||
|
return x.permuteScalarsHiGrouped(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
|
||||||
|
}
|
||||||
|
|
||||||
|
// PermuteScalarsHiGrouped performs a grouped permutation of vector x using the supplied indices:
|
||||||
|
//
|
||||||
|
// result =
|
||||||
|
// { x[0], x[1], x[2], x[3], x[a+4], x[b+4], x[c+4], x[d+4],
|
||||||
|
// x[8], x[9], x[10], x[11], x[a+12], x[b+12], x[c+12], x[d+12],
|
||||||
|
// x[16], x[17], x[18], x[19], x[a+20], x[b+20], x[c+20], x[d+20],
|
||||||
|
// x[24], x[25], x[26], x[27], x[a+28], x[b+28], x[c+28], x[d+28]}
|
||||||
|
//
|
||||||
|
// Parameters a,b,c,d should have values between 0 and 3.
|
||||||
|
// If a through d are constants, then an instruction will be inlined, otherwise
|
||||||
|
// a jump table is generated.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFHW, CPU Feature: AVX512
|
||||||
|
func (x Uint16x32) PermuteScalarsHiGrouped(a, b, c, d uint8) Uint16x32 {
|
||||||
|
return x.permuteScalarsHiGrouped(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* PermuteScalarsLo */
|
||||||
|
|
||||||
|
// PermuteScalarsLo performs a permutation of vector x using the supplied indices:
|
||||||
|
//
|
||||||
|
// result = {x[a], x[b], x[c], x[d], x[4], x[5], x[6], x[7]}
|
||||||
|
//
|
||||||
|
// Parameters a,b,c,d should have values between 0 and 3.
|
||||||
|
// If a through d are constants, then an instruction will be inlined, otherwise
|
||||||
|
// a jump table is generated.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFLW, CPU Feature: AVX512
|
||||||
|
func (x Int16x8) PermuteScalarsLo(a, b, c, d uint8) Int16x8 {
|
||||||
|
return x.permuteScalarsLo(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
|
||||||
|
}
|
||||||
|
|
||||||
|
// PermuteScalarsLo performs a permutation of vector x using the supplied indices:
|
||||||
|
//
|
||||||
|
// result = {x[a], x[b], x[c], x[d], x[4], x[5], x[6], x[7]}
|
||||||
|
//
|
||||||
|
// Parameters a,b,c,d should have values between 0 and 3.
|
||||||
|
// If a through d are constants, then an instruction will be inlined, otherwise
|
||||||
|
// a jump table is generated.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFLW, CPU Feature: AVX512
|
||||||
|
func (x Uint16x8) PermuteScalarsLo(a, b, c, d uint8) Uint16x8 {
|
||||||
|
return x.permuteScalarsLo(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* PermuteScalarsLoGrouped */
|
||||||
|
|
||||||
|
// PermuteScalarsLoGrouped performs a grouped permutation of vector x using the supplied indices:
|
||||||
|
//
|
||||||
|
// result =
|
||||||
|
// {x[a], x[b], x[c], x[d], x[4], x[5], x[6], x[7],
|
||||||
|
// x[a+8], x[b+8], x[c+8], x[d+8], x[12], x[13], x[14], x[15]}
|
||||||
|
//
|
||||||
|
// Parameters a,b,c,d should have values between 0 and 3.
|
||||||
|
// If a through d are constants, then an instruction will be inlined, otherwise
|
||||||
|
// a jump table is generated.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFLW, CPU Feature: AVX2
|
||||||
|
func (x Int16x16) PermuteScalarsLoGrouped(a, b, c, d uint8) Int16x16 {
|
||||||
|
return x.permuteScalarsLoGrouped(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
|
||||||
|
}
|
||||||
|
|
||||||
|
// PermuteScalarsLoGrouped performs a grouped permutation of vector x using the supplied indices:
|
||||||
|
//
|
||||||
|
// result =
|
||||||
|
// {x[a], x[b], x[c], x[d], x[4], x[5], x[6], x[7],
|
||||||
|
// x[a+8], x[b+8], x[c+8], x[d+8], x[12], x[13], x[14], x[15],
|
||||||
|
// x[a+16], x[b+16], x[c+16], x[d+16], x[20], x[21], x[22], x[23],
|
||||||
|
// x[a+24], x[b+24], x[c+24], x[d+24], x[28], x[29], x[30], x[31]}
|
||||||
|
//
|
||||||
|
// Parameters a,b,c,d should have values between 0 and 3.
|
||||||
|
// If a through d are constants, then an instruction will be inlined, otherwise
|
||||||
|
// a jump table is generated.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFLW, CPU Feature: AVX512
|
||||||
|
func (x Int16x32) PermuteScalarsLoGrouped(a, b, c, d uint8) Int16x32 {
|
||||||
|
return x.permuteScalarsLoGrouped(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
|
||||||
|
}
|
||||||
|
|
||||||
|
// PermuteScalarsLoGrouped performs a grouped permutation of vector x using the supplied indices:
|
||||||
|
//
|
||||||
|
// result = {x[a], x[b], x[c], x[d], x[4], x[5], x[6], x[7],
|
||||||
|
// x[a+8], x[b+8], x[c+8], x[d+8], x[12], x[13], x[14], x[15]}
|
||||||
|
//
|
||||||
|
// Parameters a,b,c,d should have values between 0 and 3.
|
||||||
|
// If a through d are constants, then an instruction will be inlined, otherwise
|
||||||
|
// a jump table is generated.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFLW, CPU Feature: AVX2
|
||||||
|
func (x Uint16x16) PermuteScalarsLoGrouped(a, b, c, d uint8) Uint16x16 {
|
||||||
|
return x.permuteScalarsLoGrouped(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
|
||||||
|
}
|
||||||
|
|
||||||
|
// PermuteScalarsLoGrouped performs a grouped permutation of vector x using the supplied indices:
|
||||||
|
//
|
||||||
|
// result =
|
||||||
|
// {x[a], x[b], x[c], x[d], x[4], x[5], x[6], x[7],
|
||||||
|
// x[a+8], x[b+8], x[c+8], x[d+8], x[12], x[13], x[14], x[15],
|
||||||
|
// x[a+16], x[b+16], x[c+16], x[d+16], x[20], x[21], x[22], x[23],
|
||||||
|
// x[a+24], x[b+24], x[c+24], x[d+24], x[28], x[29], x[30], x[31]}
|
||||||
|
//
|
||||||
|
// Each group is of size 128-bit.
|
||||||
|
//
|
||||||
|
// Parameters a,b,c,d should have values between 0 and 3.
|
||||||
|
// If a through d are constants, then an instruction will be inlined, otherwise
|
||||||
|
// a jump table is generated.
|
||||||
|
//
|
||||||
|
// Asm: VPSHUFLW, CPU Feature: AVX512
|
||||||
|
func (x Uint16x32) PermuteScalarsLoGrouped(a, b, c, d uint8) Uint16x32 {
|
||||||
|
return x.permuteScalarsLoGrouped(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue