mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] simd, cmd/compile: rename some methods
generated by simdgen CL 692556 these are the "easy" ones SaturatedOp -> OpSaturated PairwiseOp -> OpPairs OpWithPrecision -> OpScaled DiffWithOpWithPrecision -> OpScaledResidue Change-Id: I036bf89c0690bcf9922c376d62cef48392942af3 Reviewed-on: https://go-review.googlesource.com/c/go/+/692357 Reviewed-by: Junyang Shao <shaojunyang@google.com> Reviewed-by: Cherry Mui <cherryyz@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
d375b95357
commit
6b9b59e144
9 changed files with 4809 additions and 4813 deletions
|
|
@ -80,6 +80,22 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPADDQ128,
|
||||
ssa.OpAMD64VPADDQ256,
|
||||
ssa.OpAMD64VPADDQ512,
|
||||
ssa.OpAMD64VHADDPS128,
|
||||
ssa.OpAMD64VHADDPS256,
|
||||
ssa.OpAMD64VHADDPD128,
|
||||
ssa.OpAMD64VHADDPD256,
|
||||
ssa.OpAMD64VPHADDW128,
|
||||
ssa.OpAMD64VPHADDW256,
|
||||
ssa.OpAMD64VPHADDD128,
|
||||
ssa.OpAMD64VPHADDD256,
|
||||
ssa.OpAMD64VPHADDSW128,
|
||||
ssa.OpAMD64VPHADDSW256,
|
||||
ssa.OpAMD64VPADDSB128,
|
||||
ssa.OpAMD64VPADDSB256,
|
||||
ssa.OpAMD64VPADDSB512,
|
||||
ssa.OpAMD64VPADDSW128,
|
||||
ssa.OpAMD64VPADDSW256,
|
||||
ssa.OpAMD64VPADDSW512,
|
||||
ssa.OpAMD64VADDSUBPS128,
|
||||
ssa.OpAMD64VADDSUBPS256,
|
||||
ssa.OpAMD64VADDSUBPD128,
|
||||
|
|
@ -189,12 +205,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VMULPD128,
|
||||
ssa.OpAMD64VMULPD256,
|
||||
ssa.OpAMD64VMULPD512,
|
||||
ssa.OpAMD64VSCALEFPS128,
|
||||
ssa.OpAMD64VSCALEFPS256,
|
||||
ssa.OpAMD64VSCALEFPS512,
|
||||
ssa.OpAMD64VSCALEFPD128,
|
||||
ssa.OpAMD64VSCALEFPD256,
|
||||
ssa.OpAMD64VSCALEFPD512,
|
||||
ssa.OpAMD64VPMULLW128,
|
||||
ssa.OpAMD64VPMULLW256,
|
||||
ssa.OpAMD64VPMULLW512,
|
||||
ssa.OpAMD64VPMULLD128,
|
||||
ssa.OpAMD64VPMULLD256,
|
||||
ssa.OpAMD64VPMULLD512,
|
||||
ssa.OpAMD64VPMULLQ128,
|
||||
ssa.OpAMD64VPMULLQ256,
|
||||
ssa.OpAMD64VPMULLQ512,
|
||||
ssa.OpAMD64VPMULDQ128,
|
||||
ssa.OpAMD64VPMULDQ256,
|
||||
ssa.OpAMD64VPMULDQ512,
|
||||
|
|
@ -207,15 +226,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPMULHUW128,
|
||||
ssa.OpAMD64VPMULHUW256,
|
||||
ssa.OpAMD64VPMULHUW512,
|
||||
ssa.OpAMD64VPMULLW128,
|
||||
ssa.OpAMD64VPMULLW256,
|
||||
ssa.OpAMD64VPMULLW512,
|
||||
ssa.OpAMD64VPMULLD128,
|
||||
ssa.OpAMD64VPMULLD256,
|
||||
ssa.OpAMD64VPMULLD512,
|
||||
ssa.OpAMD64VPMULLQ128,
|
||||
ssa.OpAMD64VPMULLQ256,
|
||||
ssa.OpAMD64VPMULLQ512,
|
||||
ssa.OpAMD64VPOR128,
|
||||
ssa.OpAMD64VPOR256,
|
||||
ssa.OpAMD64VPORD512,
|
||||
|
|
@ -223,22 +233,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPMADDWD128,
|
||||
ssa.OpAMD64VPMADDWD256,
|
||||
ssa.OpAMD64VPMADDWD512,
|
||||
ssa.OpAMD64VHADDPS128,
|
||||
ssa.OpAMD64VHADDPS256,
|
||||
ssa.OpAMD64VHADDPD128,
|
||||
ssa.OpAMD64VHADDPD256,
|
||||
ssa.OpAMD64VPHADDW128,
|
||||
ssa.OpAMD64VPHADDW256,
|
||||
ssa.OpAMD64VPHADDD128,
|
||||
ssa.OpAMD64VPHADDD256,
|
||||
ssa.OpAMD64VHSUBPS128,
|
||||
ssa.OpAMD64VHSUBPS256,
|
||||
ssa.OpAMD64VHSUBPD128,
|
||||
ssa.OpAMD64VHSUBPD256,
|
||||
ssa.OpAMD64VPHSUBW128,
|
||||
ssa.OpAMD64VPHSUBW256,
|
||||
ssa.OpAMD64VPHSUBD128,
|
||||
ssa.OpAMD64VPHSUBD256,
|
||||
ssa.OpAMD64VPERMB128,
|
||||
ssa.OpAMD64VPERMB256,
|
||||
ssa.OpAMD64VPERMB512,
|
||||
|
|
@ -265,25 +259,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPRORVQ128,
|
||||
ssa.OpAMD64VPRORVQ256,
|
||||
ssa.OpAMD64VPRORVQ512,
|
||||
ssa.OpAMD64VPADDSB128,
|
||||
ssa.OpAMD64VPADDSB256,
|
||||
ssa.OpAMD64VPADDSB512,
|
||||
ssa.OpAMD64VPADDSW128,
|
||||
ssa.OpAMD64VPADDSW256,
|
||||
ssa.OpAMD64VPADDSW512,
|
||||
ssa.OpAMD64VPHADDSW128,
|
||||
ssa.OpAMD64VPHADDSW256,
|
||||
ssa.OpAMD64VPHSUBSW128,
|
||||
ssa.OpAMD64VPHSUBSW256,
|
||||
ssa.OpAMD64VPSUBSB128,
|
||||
ssa.OpAMD64VPSUBSB256,
|
||||
ssa.OpAMD64VPSUBSB512,
|
||||
ssa.OpAMD64VPSUBSW128,
|
||||
ssa.OpAMD64VPSUBSW256,
|
||||
ssa.OpAMD64VPSUBSW512,
|
||||
ssa.OpAMD64VPMADDUBSW128,
|
||||
ssa.OpAMD64VPMADDUBSW256,
|
||||
ssa.OpAMD64VPMADDUBSW512,
|
||||
ssa.OpAMD64VSCALEFPS128,
|
||||
ssa.OpAMD64VSCALEFPS256,
|
||||
ssa.OpAMD64VSCALEFPS512,
|
||||
ssa.OpAMD64VSCALEFPD128,
|
||||
ssa.OpAMD64VSCALEFPD256,
|
||||
ssa.OpAMD64VSCALEFPD512,
|
||||
ssa.OpAMD64VPSLLVW128,
|
||||
ssa.OpAMD64VPSLLVW256,
|
||||
ssa.OpAMD64VPSLLVW512,
|
||||
|
|
@ -335,6 +319,22 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPSUBQ128,
|
||||
ssa.OpAMD64VPSUBQ256,
|
||||
ssa.OpAMD64VPSUBQ512,
|
||||
ssa.OpAMD64VHSUBPS128,
|
||||
ssa.OpAMD64VHSUBPS256,
|
||||
ssa.OpAMD64VHSUBPD128,
|
||||
ssa.OpAMD64VHSUBPD256,
|
||||
ssa.OpAMD64VPHSUBW128,
|
||||
ssa.OpAMD64VPHSUBW256,
|
||||
ssa.OpAMD64VPHSUBD128,
|
||||
ssa.OpAMD64VPHSUBD256,
|
||||
ssa.OpAMD64VPHSUBSW128,
|
||||
ssa.OpAMD64VPHSUBSW256,
|
||||
ssa.OpAMD64VPSUBSB128,
|
||||
ssa.OpAMD64VPSUBSB256,
|
||||
ssa.OpAMD64VPSUBSB512,
|
||||
ssa.OpAMD64VPSUBSW128,
|
||||
ssa.OpAMD64VPSUBSW256,
|
||||
ssa.OpAMD64VPSUBSW512,
|
||||
ssa.OpAMD64VPXOR128,
|
||||
ssa.OpAMD64VPXOR256,
|
||||
ssa.OpAMD64VPXORD512,
|
||||
|
|
@ -369,6 +369,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPADDQMasked128,
|
||||
ssa.OpAMD64VPADDQMasked256,
|
||||
ssa.OpAMD64VPADDQMasked512,
|
||||
ssa.OpAMD64VPADDSBMasked128,
|
||||
ssa.OpAMD64VPADDSBMasked256,
|
||||
ssa.OpAMD64VPADDSBMasked512,
|
||||
ssa.OpAMD64VPADDSWMasked128,
|
||||
ssa.OpAMD64VPADDSWMasked256,
|
||||
ssa.OpAMD64VPADDSWMasked512,
|
||||
ssa.OpAMD64VPANDDMasked128,
|
||||
ssa.OpAMD64VPANDDMasked256,
|
||||
ssa.OpAMD64VPANDDMasked512,
|
||||
|
|
@ -456,12 +462,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPMINUQMasked128,
|
||||
ssa.OpAMD64VPMINUQMasked256,
|
||||
ssa.OpAMD64VPMINUQMasked512,
|
||||
ssa.OpAMD64VSCALEFPSMasked128,
|
||||
ssa.OpAMD64VSCALEFPSMasked256,
|
||||
ssa.OpAMD64VSCALEFPSMasked512,
|
||||
ssa.OpAMD64VSCALEFPDMasked128,
|
||||
ssa.OpAMD64VSCALEFPDMasked256,
|
||||
ssa.OpAMD64VSCALEFPDMasked512,
|
||||
ssa.OpAMD64VPMULDQMasked128,
|
||||
ssa.OpAMD64VPMULDQMasked256,
|
||||
ssa.OpAMD64VPMULDQMasked512,
|
||||
|
|
@ -474,6 +474,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPMULHUWMasked128,
|
||||
ssa.OpAMD64VPMULHUWMasked256,
|
||||
ssa.OpAMD64VPMULHUWMasked512,
|
||||
ssa.OpAMD64VMULPSMasked128,
|
||||
ssa.OpAMD64VMULPSMasked256,
|
||||
ssa.OpAMD64VMULPSMasked512,
|
||||
ssa.OpAMD64VMULPDMasked128,
|
||||
ssa.OpAMD64VMULPDMasked256,
|
||||
ssa.OpAMD64VMULPDMasked512,
|
||||
ssa.OpAMD64VPMULLWMasked128,
|
||||
ssa.OpAMD64VPMULLWMasked256,
|
||||
ssa.OpAMD64VPMULLWMasked512,
|
||||
|
|
@ -483,12 +489,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPMULLQMasked128,
|
||||
ssa.OpAMD64VPMULLQMasked256,
|
||||
ssa.OpAMD64VPMULLQMasked512,
|
||||
ssa.OpAMD64VMULPSMasked128,
|
||||
ssa.OpAMD64VMULPSMasked256,
|
||||
ssa.OpAMD64VMULPSMasked512,
|
||||
ssa.OpAMD64VMULPDMasked128,
|
||||
ssa.OpAMD64VMULPDMasked256,
|
||||
ssa.OpAMD64VMULPDMasked512,
|
||||
ssa.OpAMD64VPORDMasked128,
|
||||
ssa.OpAMD64VPORDMasked256,
|
||||
ssa.OpAMD64VPORDMasked512,
|
||||
|
|
@ -524,21 +524,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPRORVQMasked128,
|
||||
ssa.OpAMD64VPRORVQMasked256,
|
||||
ssa.OpAMD64VPRORVQMasked512,
|
||||
ssa.OpAMD64VPADDSBMasked128,
|
||||
ssa.OpAMD64VPADDSBMasked256,
|
||||
ssa.OpAMD64VPADDSBMasked512,
|
||||
ssa.OpAMD64VPADDSWMasked128,
|
||||
ssa.OpAMD64VPADDSWMasked256,
|
||||
ssa.OpAMD64VPADDSWMasked512,
|
||||
ssa.OpAMD64VPSUBSBMasked128,
|
||||
ssa.OpAMD64VPSUBSBMasked256,
|
||||
ssa.OpAMD64VPSUBSBMasked512,
|
||||
ssa.OpAMD64VPSUBSWMasked128,
|
||||
ssa.OpAMD64VPSUBSWMasked256,
|
||||
ssa.OpAMD64VPSUBSWMasked512,
|
||||
ssa.OpAMD64VPMADDUBSWMasked128,
|
||||
ssa.OpAMD64VPMADDUBSWMasked256,
|
||||
ssa.OpAMD64VPMADDUBSWMasked512,
|
||||
ssa.OpAMD64VSCALEFPSMasked128,
|
||||
ssa.OpAMD64VSCALEFPSMasked256,
|
||||
ssa.OpAMD64VSCALEFPSMasked512,
|
||||
ssa.OpAMD64VSCALEFPDMasked128,
|
||||
ssa.OpAMD64VSCALEFPDMasked256,
|
||||
ssa.OpAMD64VSCALEFPDMasked512,
|
||||
ssa.OpAMD64VPSLLVWMasked128,
|
||||
ssa.OpAMD64VPSLLVWMasked256,
|
||||
ssa.OpAMD64VPSLLVWMasked512,
|
||||
|
|
@ -584,6 +578,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPSUBQMasked128,
|
||||
ssa.OpAMD64VPSUBQMasked256,
|
||||
ssa.OpAMD64VPSUBQMasked512,
|
||||
ssa.OpAMD64VPSUBSBMasked128,
|
||||
ssa.OpAMD64VPSUBSBMasked256,
|
||||
ssa.OpAMD64VPSUBSBMasked512,
|
||||
ssa.OpAMD64VPSUBSWMasked128,
|
||||
ssa.OpAMD64VPSUBSWMasked256,
|
||||
ssa.OpAMD64VPSUBSWMasked512,
|
||||
ssa.OpAMD64VPXORDMasked128,
|
||||
ssa.OpAMD64VPXORDMasked256,
|
||||
ssa.OpAMD64VPXORDMasked512,
|
||||
|
|
@ -1085,6 +1085,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPADDQMasked128,
|
||||
ssa.OpAMD64VPADDQMasked256,
|
||||
ssa.OpAMD64VPADDQMasked512,
|
||||
ssa.OpAMD64VPADDSBMasked128,
|
||||
ssa.OpAMD64VPADDSBMasked256,
|
||||
ssa.OpAMD64VPADDSBMasked512,
|
||||
ssa.OpAMD64VPADDSWMasked128,
|
||||
ssa.OpAMD64VPADDSWMasked256,
|
||||
ssa.OpAMD64VPADDSWMasked512,
|
||||
ssa.OpAMD64VPANDDMasked128,
|
||||
ssa.OpAMD64VPANDDMasked256,
|
||||
ssa.OpAMD64VPANDDMasked512,
|
||||
|
|
@ -1121,6 +1127,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VRNDSCALEPDMasked128,
|
||||
ssa.OpAMD64VRNDSCALEPDMasked256,
|
||||
ssa.OpAMD64VRNDSCALEPDMasked512,
|
||||
ssa.OpAMD64VREDUCEPSMasked128,
|
||||
ssa.OpAMD64VREDUCEPSMasked256,
|
||||
ssa.OpAMD64VREDUCEPSMasked512,
|
||||
ssa.OpAMD64VREDUCEPDMasked128,
|
||||
ssa.OpAMD64VREDUCEPDMasked256,
|
||||
ssa.OpAMD64VREDUCEPDMasked512,
|
||||
ssa.OpAMD64VCOMPRESSPSMasked128,
|
||||
ssa.OpAMD64VCOMPRESSPSMasked256,
|
||||
ssa.OpAMD64VCOMPRESSPSMasked512,
|
||||
|
|
@ -1145,12 +1157,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VCVTPS2UDQMasked128,
|
||||
ssa.OpAMD64VCVTPS2UDQMasked256,
|
||||
ssa.OpAMD64VCVTPS2UDQMasked512,
|
||||
ssa.OpAMD64VREDUCEPSMasked128,
|
||||
ssa.OpAMD64VREDUCEPSMasked256,
|
||||
ssa.OpAMD64VREDUCEPSMasked512,
|
||||
ssa.OpAMD64VREDUCEPDMasked128,
|
||||
ssa.OpAMD64VREDUCEPDMasked256,
|
||||
ssa.OpAMD64VREDUCEPDMasked512,
|
||||
ssa.OpAMD64VDIVPSMasked128,
|
||||
ssa.OpAMD64VDIVPSMasked256,
|
||||
ssa.OpAMD64VDIVPSMasked512,
|
||||
|
|
@ -1244,12 +1250,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPMINUQMasked128,
|
||||
ssa.OpAMD64VPMINUQMasked256,
|
||||
ssa.OpAMD64VPMINUQMasked512,
|
||||
ssa.OpAMD64VSCALEFPSMasked128,
|
||||
ssa.OpAMD64VSCALEFPSMasked256,
|
||||
ssa.OpAMD64VSCALEFPSMasked512,
|
||||
ssa.OpAMD64VSCALEFPDMasked128,
|
||||
ssa.OpAMD64VSCALEFPDMasked256,
|
||||
ssa.OpAMD64VSCALEFPDMasked512,
|
||||
ssa.OpAMD64VPMULDQMasked128,
|
||||
ssa.OpAMD64VPMULDQMasked256,
|
||||
ssa.OpAMD64VPMULDQMasked512,
|
||||
|
|
@ -1262,6 +1262,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPMULHUWMasked128,
|
||||
ssa.OpAMD64VPMULHUWMasked256,
|
||||
ssa.OpAMD64VPMULHUWMasked512,
|
||||
ssa.OpAMD64VMULPSMasked128,
|
||||
ssa.OpAMD64VMULPSMasked256,
|
||||
ssa.OpAMD64VMULPSMasked512,
|
||||
ssa.OpAMD64VMULPDMasked128,
|
||||
ssa.OpAMD64VMULPDMasked256,
|
||||
ssa.OpAMD64VMULPDMasked512,
|
||||
ssa.OpAMD64VPMULLWMasked128,
|
||||
ssa.OpAMD64VPMULLWMasked256,
|
||||
ssa.OpAMD64VPMULLWMasked512,
|
||||
|
|
@ -1271,12 +1277,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPMULLQMasked128,
|
||||
ssa.OpAMD64VPMULLQMasked256,
|
||||
ssa.OpAMD64VPMULLQMasked512,
|
||||
ssa.OpAMD64VMULPSMasked128,
|
||||
ssa.OpAMD64VMULPSMasked256,
|
||||
ssa.OpAMD64VMULPSMasked512,
|
||||
ssa.OpAMD64VMULPDMasked128,
|
||||
ssa.OpAMD64VMULPDMasked256,
|
||||
ssa.OpAMD64VMULPDMasked512,
|
||||
ssa.OpAMD64VPORDMasked128,
|
||||
ssa.OpAMD64VPORDMasked256,
|
||||
ssa.OpAMD64VPORDMasked512,
|
||||
|
|
@ -1357,24 +1357,18 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPDPWSSDSMasked128,
|
||||
ssa.OpAMD64VPDPWSSDSMasked256,
|
||||
ssa.OpAMD64VPDPWSSDSMasked512,
|
||||
ssa.OpAMD64VPADDSBMasked128,
|
||||
ssa.OpAMD64VPADDSBMasked256,
|
||||
ssa.OpAMD64VPADDSBMasked512,
|
||||
ssa.OpAMD64VPADDSWMasked128,
|
||||
ssa.OpAMD64VPADDSWMasked256,
|
||||
ssa.OpAMD64VPADDSWMasked512,
|
||||
ssa.OpAMD64VPSUBSBMasked128,
|
||||
ssa.OpAMD64VPSUBSBMasked256,
|
||||
ssa.OpAMD64VPSUBSBMasked512,
|
||||
ssa.OpAMD64VPSUBSWMasked128,
|
||||
ssa.OpAMD64VPSUBSWMasked256,
|
||||
ssa.OpAMD64VPSUBSWMasked512,
|
||||
ssa.OpAMD64VPMADDUBSWMasked128,
|
||||
ssa.OpAMD64VPMADDUBSWMasked256,
|
||||
ssa.OpAMD64VPMADDUBSWMasked512,
|
||||
ssa.OpAMD64VPDPBUSDSMasked128,
|
||||
ssa.OpAMD64VPDPBUSDSMasked256,
|
||||
ssa.OpAMD64VPDPBUSDSMasked512,
|
||||
ssa.OpAMD64VSCALEFPSMasked128,
|
||||
ssa.OpAMD64VSCALEFPSMasked256,
|
||||
ssa.OpAMD64VSCALEFPSMasked512,
|
||||
ssa.OpAMD64VSCALEFPDMasked128,
|
||||
ssa.OpAMD64VSCALEFPDMasked256,
|
||||
ssa.OpAMD64VSCALEFPDMasked512,
|
||||
ssa.OpAMD64VPSHLDWMasked128,
|
||||
ssa.OpAMD64VPSHLDWMasked256,
|
||||
ssa.OpAMD64VPSHLDWMasked512,
|
||||
|
|
@ -1489,6 +1483,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VPSUBQMasked128,
|
||||
ssa.OpAMD64VPSUBQMasked256,
|
||||
ssa.OpAMD64VPSUBQMasked512,
|
||||
ssa.OpAMD64VPSUBSBMasked128,
|
||||
ssa.OpAMD64VPSUBSBMasked256,
|
||||
ssa.OpAMD64VPSUBSBMasked512,
|
||||
ssa.OpAMD64VPSUBSWMasked128,
|
||||
ssa.OpAMD64VPSUBSWMasked256,
|
||||
ssa.OpAMD64VPSUBSWMasked512,
|
||||
ssa.OpAMD64VPDPBUSDMasked128,
|
||||
ssa.OpAMD64VPDPBUSDMasked256,
|
||||
ssa.OpAMD64VPDPBUSDMasked512,
|
||||
|
|
|
|||
|
|
@ -90,6 +90,44 @@
|
|||
(AddMaskedUint64x2 x y mask) => (VPADDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(AddMaskedUint64x4 x y mask) => (VPADDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(AddMaskedUint64x8 x y mask) => (VPADDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(AddPairsFloat32x4 ...) => (VHADDPS128 ...)
|
||||
(AddPairsFloat32x8 ...) => (VHADDPS256 ...)
|
||||
(AddPairsFloat64x2 ...) => (VHADDPD128 ...)
|
||||
(AddPairsFloat64x4 ...) => (VHADDPD256 ...)
|
||||
(AddPairsInt16x8 ...) => (VPHADDW128 ...)
|
||||
(AddPairsInt16x16 ...) => (VPHADDW256 ...)
|
||||
(AddPairsInt32x4 ...) => (VPHADDD128 ...)
|
||||
(AddPairsInt32x8 ...) => (VPHADDD256 ...)
|
||||
(AddPairsUint16x8 ...) => (VPHADDW128 ...)
|
||||
(AddPairsUint16x16 ...) => (VPHADDW256 ...)
|
||||
(AddPairsUint32x4 ...) => (VPHADDD128 ...)
|
||||
(AddPairsUint32x8 ...) => (VPHADDD256 ...)
|
||||
(AddPairsSaturatedInt16x8 ...) => (VPHADDSW128 ...)
|
||||
(AddPairsSaturatedInt16x16 ...) => (VPHADDSW256 ...)
|
||||
(AddSaturatedInt8x16 ...) => (VPADDSB128 ...)
|
||||
(AddSaturatedInt8x32 ...) => (VPADDSB256 ...)
|
||||
(AddSaturatedInt8x64 ...) => (VPADDSB512 ...)
|
||||
(AddSaturatedInt16x8 ...) => (VPADDSW128 ...)
|
||||
(AddSaturatedInt16x16 ...) => (VPADDSW256 ...)
|
||||
(AddSaturatedInt16x32 ...) => (VPADDSW512 ...)
|
||||
(AddSaturatedUint8x16 ...) => (VPADDSB128 ...)
|
||||
(AddSaturatedUint8x32 ...) => (VPADDSB256 ...)
|
||||
(AddSaturatedUint8x64 ...) => (VPADDSB512 ...)
|
||||
(AddSaturatedUint16x8 ...) => (VPADDSW128 ...)
|
||||
(AddSaturatedUint16x16 ...) => (VPADDSW256 ...)
|
||||
(AddSaturatedUint16x32 ...) => (VPADDSW512 ...)
|
||||
(AddSaturatedMaskedInt8x16 x y mask) => (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(AddSaturatedMaskedInt8x32 x y mask) => (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(AddSaturatedMaskedInt8x64 x y mask) => (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
|
||||
(AddSaturatedMaskedInt16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(AddSaturatedMaskedInt16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(AddSaturatedMaskedInt16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
|
||||
(AddSaturatedMaskedUint8x16 x y mask) => (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(AddSaturatedMaskedUint8x32 x y mask) => (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(AddSaturatedMaskedUint8x64 x y mask) => (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
|
||||
(AddSaturatedMaskedUint16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(AddSaturatedMaskedUint16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(AddSaturatedMaskedUint16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
|
||||
(AddSubFloat32x4 ...) => (VADDSUBPS128 ...)
|
||||
(AddSubFloat32x8 ...) => (VADDSUBPS256 ...)
|
||||
(AddSubFloat64x2 ...) => (VADDSUBPD128 ...)
|
||||
|
|
@ -206,18 +244,30 @@
|
|||
(CeilFloat32x8 x) => (VROUNDPS256 [2] x)
|
||||
(CeilFloat64x2 x) => (VROUNDPD128 [2] x)
|
||||
(CeilFloat64x4 x) => (VROUNDPD256 [2] x)
|
||||
(CeilWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+2] x)
|
||||
(CeilWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+2] x)
|
||||
(CeilWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+2] x)
|
||||
(CeilWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+2] x)
|
||||
(CeilWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+2] x)
|
||||
(CeilWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+2] x)
|
||||
(CeilWithPrecisionMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(CeilWithPrecisionMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(CeilWithPrecisionMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(CeilWithPrecisionMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(CeilWithPrecisionMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(CeilWithPrecisionMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(CeilScaledFloat32x4 [a] x) => (VRNDSCALEPS128 [a+2] x)
|
||||
(CeilScaledFloat32x8 [a] x) => (VRNDSCALEPS256 [a+2] x)
|
||||
(CeilScaledFloat32x16 [a] x) => (VRNDSCALEPS512 [a+2] x)
|
||||
(CeilScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+2] x)
|
||||
(CeilScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+2] x)
|
||||
(CeilScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+2] x)
|
||||
(CeilScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(CeilScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(CeilScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(CeilScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(CeilScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(CeilScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(CeilScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+2] x)
|
||||
(CeilScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+2] x)
|
||||
(CeilScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+2] x)
|
||||
(CeilScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+2] x)
|
||||
(CeilScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+2] x)
|
||||
(CeilScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+2] x)
|
||||
(CeilScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(CeilScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(CeilScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(CeilScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(CeilScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(CeilScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(CompressFloat32x4 x mask) => (VCOMPRESSPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(CompressFloat32x8 x mask) => (VCOMPRESSPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(CompressFloat32x16 x mask) => (VCOMPRESSPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
|
|
@ -260,54 +310,6 @@
|
|||
(ConvertToUint32MaskedFloat32x4 x mask) => (VCVTPS2UDQMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(ConvertToUint32MaskedFloat32x8 x mask) => (VCVTPS2UDQMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(ConvertToUint32MaskedFloat32x16 x mask) => (VCVTPS2UDQMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(DiffWithCeilWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+2] x)
|
||||
(DiffWithCeilWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+2] x)
|
||||
(DiffWithCeilWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+2] x)
|
||||
(DiffWithCeilWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+2] x)
|
||||
(DiffWithCeilWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+2] x)
|
||||
(DiffWithCeilWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+2] x)
|
||||
(DiffWithCeilWithPrecisionMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(DiffWithCeilWithPrecisionMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(DiffWithCeilWithPrecisionMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(DiffWithCeilWithPrecisionMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(DiffWithCeilWithPrecisionMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(DiffWithCeilWithPrecisionMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(DiffWithFloorWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+1] x)
|
||||
(DiffWithFloorWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+1] x)
|
||||
(DiffWithFloorWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+1] x)
|
||||
(DiffWithFloorWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+1] x)
|
||||
(DiffWithFloorWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+1] x)
|
||||
(DiffWithFloorWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+1] x)
|
||||
(DiffWithFloorWithPrecisionMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(DiffWithFloorWithPrecisionMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(DiffWithFloorWithPrecisionMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(DiffWithFloorWithPrecisionMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(DiffWithFloorWithPrecisionMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(DiffWithFloorWithPrecisionMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(DiffWithRoundWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+0] x)
|
||||
(DiffWithRoundWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+0] x)
|
||||
(DiffWithRoundWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+0] x)
|
||||
(DiffWithRoundWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+0] x)
|
||||
(DiffWithRoundWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+0] x)
|
||||
(DiffWithRoundWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+0] x)
|
||||
(DiffWithRoundWithPrecisionMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(DiffWithRoundWithPrecisionMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(DiffWithRoundWithPrecisionMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(DiffWithRoundWithPrecisionMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(DiffWithRoundWithPrecisionMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(DiffWithRoundWithPrecisionMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(DiffWithTruncWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+3] x)
|
||||
(DiffWithTruncWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+3] x)
|
||||
(DiffWithTruncWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+3] x)
|
||||
(DiffWithTruncWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+3] x)
|
||||
(DiffWithTruncWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+3] x)
|
||||
(DiffWithTruncWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+3] x)
|
||||
(DiffWithTruncWithPrecisionMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(DiffWithTruncWithPrecisionMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(DiffWithTruncWithPrecisionMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(DiffWithTruncWithPrecisionMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(DiffWithTruncWithPrecisionMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(DiffWithTruncWithPrecisionMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(DivFloat32x4 ...) => (VDIVPS128 ...)
|
||||
(DivFloat32x8 ...) => (VDIVPS256 ...)
|
||||
(DivFloat32x16 ...) => (VDIVPS512 ...)
|
||||
|
|
@ -387,18 +389,30 @@
|
|||
(FloorFloat32x8 x) => (VROUNDPS256 [1] x)
|
||||
(FloorFloat64x2 x) => (VROUNDPD128 [1] x)
|
||||
(FloorFloat64x4 x) => (VROUNDPD256 [1] x)
|
||||
(FloorWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+1] x)
|
||||
(FloorWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+1] x)
|
||||
(FloorWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+1] x)
|
||||
(FloorWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+1] x)
|
||||
(FloorWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+1] x)
|
||||
(FloorWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+1] x)
|
||||
(FloorWithPrecisionMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(FloorWithPrecisionMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(FloorWithPrecisionMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(FloorWithPrecisionMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(FloorWithPrecisionMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(FloorWithPrecisionMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(FloorScaledFloat32x4 [a] x) => (VRNDSCALEPS128 [a+1] x)
|
||||
(FloorScaledFloat32x8 [a] x) => (VRNDSCALEPS256 [a+1] x)
|
||||
(FloorScaledFloat32x16 [a] x) => (VRNDSCALEPS512 [a+1] x)
|
||||
(FloorScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+1] x)
|
||||
(FloorScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+1] x)
|
||||
(FloorScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+1] x)
|
||||
(FloorScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(FloorScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(FloorScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(FloorScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(FloorScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(FloorScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(FloorScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+1] x)
|
||||
(FloorScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+1] x)
|
||||
(FloorScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+1] x)
|
||||
(FloorScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+1] x)
|
||||
(FloorScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+1] x)
|
||||
(FloorScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+1] x)
|
||||
(FloorScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(FloorScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(FloorScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(FloorScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(FloorScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(FloorScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(FusedMultiplyAddFloat32x4 ...) => (VFMADD213PS128 ...)
|
||||
(FusedMultiplyAddFloat32x8 ...) => (VFMADD213PS256 ...)
|
||||
(FusedMultiplyAddFloat32x16 ...) => (VFMADD213PS512 ...)
|
||||
|
|
@ -849,18 +863,15 @@
|
|||
(MulFloat64x2 ...) => (VMULPD128 ...)
|
||||
(MulFloat64x4 ...) => (VMULPD256 ...)
|
||||
(MulFloat64x8 ...) => (VMULPD512 ...)
|
||||
(MulByPowOf2Float32x4 ...) => (VSCALEFPS128 ...)
|
||||
(MulByPowOf2Float32x8 ...) => (VSCALEFPS256 ...)
|
||||
(MulByPowOf2Float32x16 ...) => (VSCALEFPS512 ...)
|
||||
(MulByPowOf2Float64x2 ...) => (VSCALEFPD128 ...)
|
||||
(MulByPowOf2Float64x4 ...) => (VSCALEFPD256 ...)
|
||||
(MulByPowOf2Float64x8 ...) => (VSCALEFPD512 ...)
|
||||
(MulByPowOf2MaskedFloat32x4 x y mask) => (VSCALEFPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(MulByPowOf2MaskedFloat32x8 x y mask) => (VSCALEFPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(MulByPowOf2MaskedFloat32x16 x y mask) => (VSCALEFPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(MulByPowOf2MaskedFloat64x2 x y mask) => (VSCALEFPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(MulByPowOf2MaskedFloat64x4 x y mask) => (VSCALEFPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(MulByPowOf2MaskedFloat64x8 x y mask) => (VSCALEFPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(MulInt16x8 ...) => (VPMULLW128 ...)
|
||||
(MulInt16x16 ...) => (VPMULLW256 ...)
|
||||
(MulInt16x32 ...) => (VPMULLW512 ...)
|
||||
(MulInt32x4 ...) => (VPMULLD128 ...)
|
||||
(MulInt32x8 ...) => (VPMULLD256 ...)
|
||||
(MulInt32x16 ...) => (VPMULLD512 ...)
|
||||
(MulInt64x2 ...) => (VPMULLQ128 ...)
|
||||
(MulInt64x4 ...) => (VPMULLQ256 ...)
|
||||
(MulInt64x8 ...) => (VPMULLQ512 ...)
|
||||
(MulEvenWidenInt32x4 ...) => (VPMULDQ128 ...)
|
||||
(MulEvenWidenInt32x8 ...) => (VPMULDQ256 ...)
|
||||
(MulEvenWidenInt64x2 ...) => (VPMULDQ128 ...)
|
||||
|
|
@ -889,30 +900,21 @@
|
|||
(MulHighMaskedUint16x8 x y mask) => (VPMULHUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(MulHighMaskedUint16x16 x y mask) => (VPMULHUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(MulHighMaskedUint16x32 x y mask) => (VPMULHUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
|
||||
(MulLowInt16x8 ...) => (VPMULLW128 ...)
|
||||
(MulLowInt16x16 ...) => (VPMULLW256 ...)
|
||||
(MulLowInt16x32 ...) => (VPMULLW512 ...)
|
||||
(MulLowInt32x4 ...) => (VPMULLD128 ...)
|
||||
(MulLowInt32x8 ...) => (VPMULLD256 ...)
|
||||
(MulLowInt32x16 ...) => (VPMULLD512 ...)
|
||||
(MulLowInt64x2 ...) => (VPMULLQ128 ...)
|
||||
(MulLowInt64x4 ...) => (VPMULLQ256 ...)
|
||||
(MulLowInt64x8 ...) => (VPMULLQ512 ...)
|
||||
(MulLowMaskedInt16x8 x y mask) => (VPMULLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(MulLowMaskedInt16x16 x y mask) => (VPMULLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(MulLowMaskedInt16x32 x y mask) => (VPMULLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
|
||||
(MulLowMaskedInt32x4 x y mask) => (VPMULLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(MulLowMaskedInt32x8 x y mask) => (VPMULLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(MulLowMaskedInt32x16 x y mask) => (VPMULLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(MulLowMaskedInt64x2 x y mask) => (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(MulLowMaskedInt64x4 x y mask) => (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(MulLowMaskedInt64x8 x y mask) => (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(MulMaskedFloat32x4 x y mask) => (VMULPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(MulMaskedFloat32x8 x y mask) => (VMULPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(MulMaskedFloat32x16 x y mask) => (VMULPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(MulMaskedFloat64x2 x y mask) => (VMULPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(MulMaskedFloat64x4 x y mask) => (VMULPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(MulMaskedFloat64x8 x y mask) => (VMULPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(MulMaskedInt16x8 x y mask) => (VPMULLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(MulMaskedInt16x16 x y mask) => (VPMULLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(MulMaskedInt16x32 x y mask) => (VPMULLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
|
||||
(MulMaskedInt32x4 x y mask) => (VPMULLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(MulMaskedInt32x8 x y mask) => (VPMULLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(MulMaskedInt32x16 x y mask) => (VPMULLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(MulMaskedInt64x2 x y mask) => (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(MulMaskedInt64x4 x y mask) => (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(MulMaskedInt64x8 x y mask) => (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(NotEqualFloat32x4 x y) => (VCMPPS128 [4] x y)
|
||||
(NotEqualFloat32x8 x y) => (VCMPPS256 [4] x y)
|
||||
(NotEqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [4] x y))
|
||||
|
|
@ -1015,30 +1017,6 @@
|
|||
(PairDotProdMaskedInt16x8 x y mask) => (VPMADDWDMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(PairDotProdMaskedInt16x16 x y mask) => (VPMADDWDMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(PairDotProdMaskedInt16x32 x y mask) => (VPMADDWDMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
|
||||
(PairwiseAddFloat32x4 ...) => (VHADDPS128 ...)
|
||||
(PairwiseAddFloat32x8 ...) => (VHADDPS256 ...)
|
||||
(PairwiseAddFloat64x2 ...) => (VHADDPD128 ...)
|
||||
(PairwiseAddFloat64x4 ...) => (VHADDPD256 ...)
|
||||
(PairwiseAddInt16x8 ...) => (VPHADDW128 ...)
|
||||
(PairwiseAddInt16x16 ...) => (VPHADDW256 ...)
|
||||
(PairwiseAddInt32x4 ...) => (VPHADDD128 ...)
|
||||
(PairwiseAddInt32x8 ...) => (VPHADDD256 ...)
|
||||
(PairwiseAddUint16x8 ...) => (VPHADDW128 ...)
|
||||
(PairwiseAddUint16x16 ...) => (VPHADDW256 ...)
|
||||
(PairwiseAddUint32x4 ...) => (VPHADDD128 ...)
|
||||
(PairwiseAddUint32x8 ...) => (VPHADDD256 ...)
|
||||
(PairwiseSubFloat32x4 ...) => (VHSUBPS128 ...)
|
||||
(PairwiseSubFloat32x8 ...) => (VHSUBPS256 ...)
|
||||
(PairwiseSubFloat64x2 ...) => (VHSUBPD128 ...)
|
||||
(PairwiseSubFloat64x4 ...) => (VHSUBPD256 ...)
|
||||
(PairwiseSubInt16x8 ...) => (VPHSUBW128 ...)
|
||||
(PairwiseSubInt16x16 ...) => (VPHSUBW256 ...)
|
||||
(PairwiseSubInt32x4 ...) => (VPHSUBD128 ...)
|
||||
(PairwiseSubInt32x8 ...) => (VPHSUBD256 ...)
|
||||
(PairwiseSubUint16x8 ...) => (VPHSUBW128 ...)
|
||||
(PairwiseSubUint16x16 ...) => (VPHSUBW256 ...)
|
||||
(PairwiseSubUint32x4 ...) => (VPHSUBD128 ...)
|
||||
(PairwiseSubUint32x8 ...) => (VPHSUBD256 ...)
|
||||
(PermuteFloat32x8 ...) => (VPERMPS256 ...)
|
||||
(PermuteFloat32x16 ...) => (VPERMPS512 ...)
|
||||
(PermuteFloat64x4 ...) => (VPERMPD256 ...)
|
||||
|
|
@ -1295,76 +1273,36 @@
|
|||
(RoundFloat32x8 x) => (VROUNDPS256 [0] x)
|
||||
(RoundFloat64x2 x) => (VROUNDPD128 [0] x)
|
||||
(RoundFloat64x4 x) => (VROUNDPD256 [0] x)
|
||||
(RoundWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+0] x)
|
||||
(RoundWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+0] x)
|
||||
(RoundWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+0] x)
|
||||
(RoundWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+0] x)
|
||||
(RoundWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+0] x)
|
||||
(RoundWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+0] x)
|
||||
(RoundWithPrecisionMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(RoundWithPrecisionMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(RoundWithPrecisionMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(RoundWithPrecisionMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(RoundWithPrecisionMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(RoundWithPrecisionMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(SaturatedAddInt8x16 ...) => (VPADDSB128 ...)
|
||||
(SaturatedAddInt8x32 ...) => (VPADDSB256 ...)
|
||||
(SaturatedAddInt8x64 ...) => (VPADDSB512 ...)
|
||||
(SaturatedAddInt16x8 ...) => (VPADDSW128 ...)
|
||||
(SaturatedAddInt16x16 ...) => (VPADDSW256 ...)
|
||||
(SaturatedAddInt16x32 ...) => (VPADDSW512 ...)
|
||||
(SaturatedAddUint8x16 ...) => (VPADDSB128 ...)
|
||||
(SaturatedAddUint8x32 ...) => (VPADDSB256 ...)
|
||||
(SaturatedAddUint8x64 ...) => (VPADDSB512 ...)
|
||||
(SaturatedAddUint16x8 ...) => (VPADDSW128 ...)
|
||||
(SaturatedAddUint16x16 ...) => (VPADDSW256 ...)
|
||||
(SaturatedAddUint16x32 ...) => (VPADDSW512 ...)
|
||||
(RoundScaledFloat32x4 [a] x) => (VRNDSCALEPS128 [a+0] x)
|
||||
(RoundScaledFloat32x8 [a] x) => (VRNDSCALEPS256 [a+0] x)
|
||||
(RoundScaledFloat32x16 [a] x) => (VRNDSCALEPS512 [a+0] x)
|
||||
(RoundScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+0] x)
|
||||
(RoundScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+0] x)
|
||||
(RoundScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+0] x)
|
||||
(RoundScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(RoundScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(RoundScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(RoundScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(RoundScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(RoundScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(RoundScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+0] x)
|
||||
(RoundScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+0] x)
|
||||
(RoundScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+0] x)
|
||||
(RoundScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+0] x)
|
||||
(RoundScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+0] x)
|
||||
(RoundScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+0] x)
|
||||
(RoundScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(RoundScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(RoundScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(RoundScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(RoundScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(RoundScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(SaturatedAddDotProdInt32x4 ...) => (VPDPWSSDS128 ...)
|
||||
(SaturatedAddDotProdInt32x8 ...) => (VPDPWSSDS256 ...)
|
||||
(SaturatedAddDotProdInt32x16 ...) => (VPDPWSSDS512 ...)
|
||||
(SaturatedAddDotProdMaskedInt32x4 x y z mask) => (VPDPWSSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(SaturatedAddDotProdMaskedInt32x8 x y z mask) => (VPDPWSSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(SaturatedAddDotProdMaskedInt32x16 x y z mask) => (VPDPWSSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(SaturatedAddMaskedInt8x16 x y mask) => (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(SaturatedAddMaskedInt8x32 x y mask) => (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(SaturatedAddMaskedInt8x64 x y mask) => (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
|
||||
(SaturatedAddMaskedInt16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(SaturatedAddMaskedInt16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(SaturatedAddMaskedInt16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
|
||||
(SaturatedAddMaskedUint8x16 x y mask) => (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(SaturatedAddMaskedUint8x32 x y mask) => (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(SaturatedAddMaskedUint8x64 x y mask) => (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
|
||||
(SaturatedAddMaskedUint16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(SaturatedAddMaskedUint16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(SaturatedAddMaskedUint16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
|
||||
(SaturatedPairwiseAddInt16x8 ...) => (VPHADDSW128 ...)
|
||||
(SaturatedPairwiseAddInt16x16 ...) => (VPHADDSW256 ...)
|
||||
(SaturatedPairwiseSubInt16x8 ...) => (VPHSUBSW128 ...)
|
||||
(SaturatedPairwiseSubInt16x16 ...) => (VPHSUBSW256 ...)
|
||||
(SaturatedSubInt8x16 ...) => (VPSUBSB128 ...)
|
||||
(SaturatedSubInt8x32 ...) => (VPSUBSB256 ...)
|
||||
(SaturatedSubInt8x64 ...) => (VPSUBSB512 ...)
|
||||
(SaturatedSubInt16x8 ...) => (VPSUBSW128 ...)
|
||||
(SaturatedSubInt16x16 ...) => (VPSUBSW256 ...)
|
||||
(SaturatedSubInt16x32 ...) => (VPSUBSW512 ...)
|
||||
(SaturatedSubUint8x16 ...) => (VPSUBSB128 ...)
|
||||
(SaturatedSubUint8x32 ...) => (VPSUBSB256 ...)
|
||||
(SaturatedSubUint8x64 ...) => (VPSUBSB512 ...)
|
||||
(SaturatedSubUint16x8 ...) => (VPSUBSW128 ...)
|
||||
(SaturatedSubUint16x16 ...) => (VPSUBSW256 ...)
|
||||
(SaturatedSubUint16x32 ...) => (VPSUBSW512 ...)
|
||||
(SaturatedSubMaskedInt8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(SaturatedSubMaskedInt8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(SaturatedSubMaskedInt8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
|
||||
(SaturatedSubMaskedInt16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(SaturatedSubMaskedInt16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(SaturatedSubMaskedInt16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
|
||||
(SaturatedSubMaskedUint8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(SaturatedSubMaskedUint8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(SaturatedSubMaskedUint8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
|
||||
(SaturatedSubMaskedUint16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(SaturatedSubMaskedUint16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(SaturatedSubMaskedUint16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
|
||||
(SaturatedUnsignedSignedPairDotProdUint8x16 ...) => (VPMADDUBSW128 ...)
|
||||
(SaturatedUnsignedSignedPairDotProdUint8x32 ...) => (VPMADDUBSW256 ...)
|
||||
(SaturatedUnsignedSignedPairDotProdUint8x64 ...) => (VPMADDUBSW512 ...)
|
||||
|
|
@ -1377,6 +1315,18 @@
|
|||
(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(ScaleFloat32x4 ...) => (VSCALEFPS128 ...)
|
||||
(ScaleFloat32x8 ...) => (VSCALEFPS256 ...)
|
||||
(ScaleFloat32x16 ...) => (VSCALEFPS512 ...)
|
||||
(ScaleFloat64x2 ...) => (VSCALEFPD128 ...)
|
||||
(ScaleFloat64x4 ...) => (VSCALEFPD256 ...)
|
||||
(ScaleFloat64x8 ...) => (VSCALEFPD512 ...)
|
||||
(ScaleMaskedFloat32x4 x y mask) => (VSCALEFPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(ScaleMaskedFloat32x8 x y mask) => (VSCALEFPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(ScaleMaskedFloat32x16 x y mask) => (VSCALEFPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(ScaleMaskedFloat64x2 x y mask) => (VSCALEFPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(ScaleMaskedFloat64x4 x y mask) => (VSCALEFPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(ScaleMaskedFloat64x8 x y mask) => (VSCALEFPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(Set128Float32x8 ...) => (VINSERTF128256 ...)
|
||||
(Set128Float64x4 ...) => (VINSERTF128256 ...)
|
||||
(Set128Int8x32 ...) => (VINSERTI128256 ...)
|
||||
|
|
@ -1761,22 +1711,72 @@
|
|||
(SubMaskedUint64x2 x y mask) => (VPSUBQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(SubMaskedUint64x4 x y mask) => (VPSUBQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(SubMaskedUint64x8 x y mask) => (VPSUBQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(SubPairsFloat32x4 ...) => (VHSUBPS128 ...)
|
||||
(SubPairsFloat32x8 ...) => (VHSUBPS256 ...)
|
||||
(SubPairsFloat64x2 ...) => (VHSUBPD128 ...)
|
||||
(SubPairsFloat64x4 ...) => (VHSUBPD256 ...)
|
||||
(SubPairsInt16x8 ...) => (VPHSUBW128 ...)
|
||||
(SubPairsInt16x16 ...) => (VPHSUBW256 ...)
|
||||
(SubPairsInt32x4 ...) => (VPHSUBD128 ...)
|
||||
(SubPairsInt32x8 ...) => (VPHSUBD256 ...)
|
||||
(SubPairsUint16x8 ...) => (VPHSUBW128 ...)
|
||||
(SubPairsUint16x16 ...) => (VPHSUBW256 ...)
|
||||
(SubPairsUint32x4 ...) => (VPHSUBD128 ...)
|
||||
(SubPairsUint32x8 ...) => (VPHSUBD256 ...)
|
||||
(SubPairsSaturatedInt16x8 ...) => (VPHSUBSW128 ...)
|
||||
(SubPairsSaturatedInt16x16 ...) => (VPHSUBSW256 ...)
|
||||
(SubSaturatedInt8x16 ...) => (VPSUBSB128 ...)
|
||||
(SubSaturatedInt8x32 ...) => (VPSUBSB256 ...)
|
||||
(SubSaturatedInt8x64 ...) => (VPSUBSB512 ...)
|
||||
(SubSaturatedInt16x8 ...) => (VPSUBSW128 ...)
|
||||
(SubSaturatedInt16x16 ...) => (VPSUBSW256 ...)
|
||||
(SubSaturatedInt16x32 ...) => (VPSUBSW512 ...)
|
||||
(SubSaturatedUint8x16 ...) => (VPSUBSB128 ...)
|
||||
(SubSaturatedUint8x32 ...) => (VPSUBSB256 ...)
|
||||
(SubSaturatedUint8x64 ...) => (VPSUBSB512 ...)
|
||||
(SubSaturatedUint16x8 ...) => (VPSUBSW128 ...)
|
||||
(SubSaturatedUint16x16 ...) => (VPSUBSW256 ...)
|
||||
(SubSaturatedUint16x32 ...) => (VPSUBSW512 ...)
|
||||
(SubSaturatedMaskedInt8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(SubSaturatedMaskedInt8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(SubSaturatedMaskedInt8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
|
||||
(SubSaturatedMaskedInt16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(SubSaturatedMaskedInt16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(SubSaturatedMaskedInt16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
|
||||
(SubSaturatedMaskedUint8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(SubSaturatedMaskedUint8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(SubSaturatedMaskedUint8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
|
||||
(SubSaturatedMaskedUint16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(SubSaturatedMaskedUint16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(SubSaturatedMaskedUint16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
|
||||
(TruncFloat32x4 x) => (VROUNDPS128 [3] x)
|
||||
(TruncFloat32x8 x) => (VROUNDPS256 [3] x)
|
||||
(TruncFloat64x2 x) => (VROUNDPD128 [3] x)
|
||||
(TruncFloat64x4 x) => (VROUNDPD256 [3] x)
|
||||
(TruncWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+3] x)
|
||||
(TruncWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+3] x)
|
||||
(TruncWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+3] x)
|
||||
(TruncWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+3] x)
|
||||
(TruncWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+3] x)
|
||||
(TruncWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+3] x)
|
||||
(TruncWithPrecisionMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(TruncWithPrecisionMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(TruncWithPrecisionMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(TruncWithPrecisionMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(TruncWithPrecisionMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(TruncWithPrecisionMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(TruncScaledFloat32x4 [a] x) => (VRNDSCALEPS128 [a+3] x)
|
||||
(TruncScaledFloat32x8 [a] x) => (VRNDSCALEPS256 [a+3] x)
|
||||
(TruncScaledFloat32x16 [a] x) => (VRNDSCALEPS512 [a+3] x)
|
||||
(TruncScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+3] x)
|
||||
(TruncScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+3] x)
|
||||
(TruncScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+3] x)
|
||||
(TruncScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(TruncScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(TruncScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(TruncScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(TruncScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(TruncScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(TruncScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+3] x)
|
||||
(TruncScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+3] x)
|
||||
(TruncScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+3] x)
|
||||
(TruncScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+3] x)
|
||||
(TruncScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+3] x)
|
||||
(TruncScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+3] x)
|
||||
(TruncScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(TruncScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(TruncScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
|
||||
(TruncScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(TruncScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(TruncScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(UnsignedSignedQuadDotProdAccumulateInt32x4 ...) => (VPDPBUSD128 ...)
|
||||
(UnsignedSignedQuadDotProdAccumulateInt32x8 ...) => (VPDPBUSD256 ...)
|
||||
(UnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSD512 ...)
|
||||
|
|
|
|||
|
|
@ -81,6 +81,44 @@ func simdGenericOps() []opData {
|
|||
{name: "AddMaskedUint64x2", argLength: 3, commutative: true},
|
||||
{name: "AddMaskedUint64x4", argLength: 3, commutative: true},
|
||||
{name: "AddMaskedUint64x8", argLength: 3, commutative: true},
|
||||
{name: "AddPairsFloat32x4", argLength: 2, commutative: false},
|
||||
{name: "AddPairsFloat32x8", argLength: 2, commutative: false},
|
||||
{name: "AddPairsFloat64x2", argLength: 2, commutative: false},
|
||||
{name: "AddPairsFloat64x4", argLength: 2, commutative: false},
|
||||
{name: "AddPairsInt16x8", argLength: 2, commutative: false},
|
||||
{name: "AddPairsInt16x16", argLength: 2, commutative: false},
|
||||
{name: "AddPairsInt32x4", argLength: 2, commutative: false},
|
||||
{name: "AddPairsInt32x8", argLength: 2, commutative: false},
|
||||
{name: "AddPairsSaturatedInt16x8", argLength: 2, commutative: false},
|
||||
{name: "AddPairsSaturatedInt16x16", argLength: 2, commutative: false},
|
||||
{name: "AddPairsUint16x8", argLength: 2, commutative: false},
|
||||
{name: "AddPairsUint16x16", argLength: 2, commutative: false},
|
||||
{name: "AddPairsUint32x4", argLength: 2, commutative: false},
|
||||
{name: "AddPairsUint32x8", argLength: 2, commutative: false},
|
||||
{name: "AddSaturatedInt8x16", argLength: 2, commutative: true},
|
||||
{name: "AddSaturatedInt8x32", argLength: 2, commutative: true},
|
||||
{name: "AddSaturatedInt8x64", argLength: 2, commutative: true},
|
||||
{name: "AddSaturatedInt16x8", argLength: 2, commutative: true},
|
||||
{name: "AddSaturatedInt16x16", argLength: 2, commutative: true},
|
||||
{name: "AddSaturatedInt16x32", argLength: 2, commutative: true},
|
||||
{name: "AddSaturatedMaskedInt8x16", argLength: 3, commutative: true},
|
||||
{name: "AddSaturatedMaskedInt8x32", argLength: 3, commutative: true},
|
||||
{name: "AddSaturatedMaskedInt8x64", argLength: 3, commutative: true},
|
||||
{name: "AddSaturatedMaskedInt16x8", argLength: 3, commutative: true},
|
||||
{name: "AddSaturatedMaskedInt16x16", argLength: 3, commutative: true},
|
||||
{name: "AddSaturatedMaskedInt16x32", argLength: 3, commutative: true},
|
||||
{name: "AddSaturatedMaskedUint8x16", argLength: 3, commutative: true},
|
||||
{name: "AddSaturatedMaskedUint8x32", argLength: 3, commutative: true},
|
||||
{name: "AddSaturatedMaskedUint8x64", argLength: 3, commutative: true},
|
||||
{name: "AddSaturatedMaskedUint16x8", argLength: 3, commutative: true},
|
||||
{name: "AddSaturatedMaskedUint16x16", argLength: 3, commutative: true},
|
||||
{name: "AddSaturatedMaskedUint16x32", argLength: 3, commutative: true},
|
||||
{name: "AddSaturatedUint8x16", argLength: 2, commutative: true},
|
||||
{name: "AddSaturatedUint8x32", argLength: 2, commutative: true},
|
||||
{name: "AddSaturatedUint8x64", argLength: 2, commutative: true},
|
||||
{name: "AddSaturatedUint16x8", argLength: 2, commutative: true},
|
||||
{name: "AddSaturatedUint16x16", argLength: 2, commutative: true},
|
||||
{name: "AddSaturatedUint16x32", argLength: 2, commutative: true},
|
||||
{name: "AddSubFloat32x4", argLength: 2, commutative: false},
|
||||
{name: "AddSubFloat32x8", argLength: 2, commutative: false},
|
||||
{name: "AddSubFloat64x2", argLength: 2, commutative: false},
|
||||
|
|
@ -744,18 +782,6 @@ func simdGenericOps() []opData {
|
|||
{name: "MinUint64x2", argLength: 2, commutative: true},
|
||||
{name: "MinUint64x4", argLength: 2, commutative: true},
|
||||
{name: "MinUint64x8", argLength: 2, commutative: true},
|
||||
{name: "MulByPowOf2Float32x4", argLength: 2, commutative: false},
|
||||
{name: "MulByPowOf2Float32x8", argLength: 2, commutative: false},
|
||||
{name: "MulByPowOf2Float32x16", argLength: 2, commutative: false},
|
||||
{name: "MulByPowOf2Float64x2", argLength: 2, commutative: false},
|
||||
{name: "MulByPowOf2Float64x4", argLength: 2, commutative: false},
|
||||
{name: "MulByPowOf2Float64x8", argLength: 2, commutative: false},
|
||||
{name: "MulByPowOf2MaskedFloat32x4", argLength: 3, commutative: false},
|
||||
{name: "MulByPowOf2MaskedFloat32x8", argLength: 3, commutative: false},
|
||||
{name: "MulByPowOf2MaskedFloat32x16", argLength: 3, commutative: false},
|
||||
{name: "MulByPowOf2MaskedFloat64x2", argLength: 3, commutative: false},
|
||||
{name: "MulByPowOf2MaskedFloat64x4", argLength: 3, commutative: false},
|
||||
{name: "MulByPowOf2MaskedFloat64x8", argLength: 3, commutative: false},
|
||||
{name: "MulEvenWidenInt32x4", argLength: 2, commutative: true},
|
||||
{name: "MulEvenWidenInt32x8", argLength: 2, commutative: true},
|
||||
{name: "MulEvenWidenInt64x2", argLength: 2, commutative: true},
|
||||
|
|
@ -790,30 +816,30 @@ func simdGenericOps() []opData {
|
|||
{name: "MulHighUint16x8", argLength: 2, commutative: true},
|
||||
{name: "MulHighUint16x16", argLength: 2, commutative: true},
|
||||
{name: "MulHighUint16x32", argLength: 2, commutative: true},
|
||||
{name: "MulLowInt16x8", argLength: 2, commutative: true},
|
||||
{name: "MulLowInt16x16", argLength: 2, commutative: true},
|
||||
{name: "MulLowInt16x32", argLength: 2, commutative: true},
|
||||
{name: "MulLowInt32x4", argLength: 2, commutative: true},
|
||||
{name: "MulLowInt32x8", argLength: 2, commutative: true},
|
||||
{name: "MulLowInt32x16", argLength: 2, commutative: true},
|
||||
{name: "MulLowInt64x2", argLength: 2, commutative: true},
|
||||
{name: "MulLowInt64x4", argLength: 2, commutative: true},
|
||||
{name: "MulLowInt64x8", argLength: 2, commutative: true},
|
||||
{name: "MulLowMaskedInt16x8", argLength: 3, commutative: true},
|
||||
{name: "MulLowMaskedInt16x16", argLength: 3, commutative: true},
|
||||
{name: "MulLowMaskedInt16x32", argLength: 3, commutative: true},
|
||||
{name: "MulLowMaskedInt32x4", argLength: 3, commutative: true},
|
||||
{name: "MulLowMaskedInt32x8", argLength: 3, commutative: true},
|
||||
{name: "MulLowMaskedInt32x16", argLength: 3, commutative: true},
|
||||
{name: "MulLowMaskedInt64x2", argLength: 3, commutative: true},
|
||||
{name: "MulLowMaskedInt64x4", argLength: 3, commutative: true},
|
||||
{name: "MulLowMaskedInt64x8", argLength: 3, commutative: true},
|
||||
{name: "MulInt16x8", argLength: 2, commutative: true},
|
||||
{name: "MulInt16x16", argLength: 2, commutative: true},
|
||||
{name: "MulInt16x32", argLength: 2, commutative: true},
|
||||
{name: "MulInt32x4", argLength: 2, commutative: true},
|
||||
{name: "MulInt32x8", argLength: 2, commutative: true},
|
||||
{name: "MulInt32x16", argLength: 2, commutative: true},
|
||||
{name: "MulInt64x2", argLength: 2, commutative: true},
|
||||
{name: "MulInt64x4", argLength: 2, commutative: true},
|
||||
{name: "MulInt64x8", argLength: 2, commutative: true},
|
||||
{name: "MulMaskedFloat32x4", argLength: 3, commutative: true},
|
||||
{name: "MulMaskedFloat32x8", argLength: 3, commutative: true},
|
||||
{name: "MulMaskedFloat32x16", argLength: 3, commutative: true},
|
||||
{name: "MulMaskedFloat64x2", argLength: 3, commutative: true},
|
||||
{name: "MulMaskedFloat64x4", argLength: 3, commutative: true},
|
||||
{name: "MulMaskedFloat64x8", argLength: 3, commutative: true},
|
||||
{name: "MulMaskedInt16x8", argLength: 3, commutative: true},
|
||||
{name: "MulMaskedInt16x16", argLength: 3, commutative: true},
|
||||
{name: "MulMaskedInt16x32", argLength: 3, commutative: true},
|
||||
{name: "MulMaskedInt32x4", argLength: 3, commutative: true},
|
||||
{name: "MulMaskedInt32x8", argLength: 3, commutative: true},
|
||||
{name: "MulMaskedInt32x16", argLength: 3, commutative: true},
|
||||
{name: "MulMaskedInt64x2", argLength: 3, commutative: true},
|
||||
{name: "MulMaskedInt64x4", argLength: 3, commutative: true},
|
||||
{name: "MulMaskedInt64x8", argLength: 3, commutative: true},
|
||||
{name: "NotEqualFloat32x4", argLength: 2, commutative: true},
|
||||
{name: "NotEqualFloat32x8", argLength: 2, commutative: true},
|
||||
{name: "NotEqualFloat32x16", argLength: 2, commutative: true},
|
||||
|
|
@ -916,30 +942,6 @@ func simdGenericOps() []opData {
|
|||
{name: "PairDotProdMaskedInt16x8", argLength: 3, commutative: false},
|
||||
{name: "PairDotProdMaskedInt16x16", argLength: 3, commutative: false},
|
||||
{name: "PairDotProdMaskedInt16x32", argLength: 3, commutative: false},
|
||||
{name: "PairwiseAddFloat32x4", argLength: 2, commutative: false},
|
||||
{name: "PairwiseAddFloat32x8", argLength: 2, commutative: false},
|
||||
{name: "PairwiseAddFloat64x2", argLength: 2, commutative: false},
|
||||
{name: "PairwiseAddFloat64x4", argLength: 2, commutative: false},
|
||||
{name: "PairwiseAddInt16x8", argLength: 2, commutative: false},
|
||||
{name: "PairwiseAddInt16x16", argLength: 2, commutative: false},
|
||||
{name: "PairwiseAddInt32x4", argLength: 2, commutative: false},
|
||||
{name: "PairwiseAddInt32x8", argLength: 2, commutative: false},
|
||||
{name: "PairwiseAddUint16x8", argLength: 2, commutative: false},
|
||||
{name: "PairwiseAddUint16x16", argLength: 2, commutative: false},
|
||||
{name: "PairwiseAddUint32x4", argLength: 2, commutative: false},
|
||||
{name: "PairwiseAddUint32x8", argLength: 2, commutative: false},
|
||||
{name: "PairwiseSubFloat32x4", argLength: 2, commutative: false},
|
||||
{name: "PairwiseSubFloat32x8", argLength: 2, commutative: false},
|
||||
{name: "PairwiseSubFloat64x2", argLength: 2, commutative: false},
|
||||
{name: "PairwiseSubFloat64x4", argLength: 2, commutative: false},
|
||||
{name: "PairwiseSubInt16x8", argLength: 2, commutative: false},
|
||||
{name: "PairwiseSubInt16x16", argLength: 2, commutative: false},
|
||||
{name: "PairwiseSubInt32x4", argLength: 2, commutative: false},
|
||||
{name: "PairwiseSubInt32x8", argLength: 2, commutative: false},
|
||||
{name: "PairwiseSubUint16x8", argLength: 2, commutative: false},
|
||||
{name: "PairwiseSubUint16x16", argLength: 2, commutative: false},
|
||||
{name: "PairwiseSubUint32x4", argLength: 2, commutative: false},
|
||||
{name: "PairwiseSubUint32x8", argLength: 2, commutative: false},
|
||||
{name: "Permute2Float32x4", argLength: 3, commutative: false},
|
||||
{name: "Permute2Float32x8", argLength: 3, commutative: false},
|
||||
{name: "Permute2Float32x16", argLength: 3, commutative: false},
|
||||
|
|
@ -1154,58 +1156,6 @@ func simdGenericOps() []opData {
|
|||
{name: "SaturatedAddDotProdMaskedInt32x4", argLength: 4, commutative: false},
|
||||
{name: "SaturatedAddDotProdMaskedInt32x8", argLength: 4, commutative: false},
|
||||
{name: "SaturatedAddDotProdMaskedInt32x16", argLength: 4, commutative: false},
|
||||
{name: "SaturatedAddInt8x16", argLength: 2, commutative: true},
|
||||
{name: "SaturatedAddInt8x32", argLength: 2, commutative: true},
|
||||
{name: "SaturatedAddInt8x64", argLength: 2, commutative: true},
|
||||
{name: "SaturatedAddInt16x8", argLength: 2, commutative: true},
|
||||
{name: "SaturatedAddInt16x16", argLength: 2, commutative: true},
|
||||
{name: "SaturatedAddInt16x32", argLength: 2, commutative: true},
|
||||
{name: "SaturatedAddMaskedInt8x16", argLength: 3, commutative: true},
|
||||
{name: "SaturatedAddMaskedInt8x32", argLength: 3, commutative: true},
|
||||
{name: "SaturatedAddMaskedInt8x64", argLength: 3, commutative: true},
|
||||
{name: "SaturatedAddMaskedInt16x8", argLength: 3, commutative: true},
|
||||
{name: "SaturatedAddMaskedInt16x16", argLength: 3, commutative: true},
|
||||
{name: "SaturatedAddMaskedInt16x32", argLength: 3, commutative: true},
|
||||
{name: "SaturatedAddMaskedUint8x16", argLength: 3, commutative: true},
|
||||
{name: "SaturatedAddMaskedUint8x32", argLength: 3, commutative: true},
|
||||
{name: "SaturatedAddMaskedUint8x64", argLength: 3, commutative: true},
|
||||
{name: "SaturatedAddMaskedUint16x8", argLength: 3, commutative: true},
|
||||
{name: "SaturatedAddMaskedUint16x16", argLength: 3, commutative: true},
|
||||
{name: "SaturatedAddMaskedUint16x32", argLength: 3, commutative: true},
|
||||
{name: "SaturatedAddUint8x16", argLength: 2, commutative: true},
|
||||
{name: "SaturatedAddUint8x32", argLength: 2, commutative: true},
|
||||
{name: "SaturatedAddUint8x64", argLength: 2, commutative: true},
|
||||
{name: "SaturatedAddUint16x8", argLength: 2, commutative: true},
|
||||
{name: "SaturatedAddUint16x16", argLength: 2, commutative: true},
|
||||
{name: "SaturatedAddUint16x32", argLength: 2, commutative: true},
|
||||
{name: "SaturatedPairwiseAddInt16x8", argLength: 2, commutative: false},
|
||||
{name: "SaturatedPairwiseAddInt16x16", argLength: 2, commutative: false},
|
||||
{name: "SaturatedPairwiseSubInt16x8", argLength: 2, commutative: false},
|
||||
{name: "SaturatedPairwiseSubInt16x16", argLength: 2, commutative: false},
|
||||
{name: "SaturatedSubInt8x16", argLength: 2, commutative: false},
|
||||
{name: "SaturatedSubInt8x32", argLength: 2, commutative: false},
|
||||
{name: "SaturatedSubInt8x64", argLength: 2, commutative: false},
|
||||
{name: "SaturatedSubInt16x8", argLength: 2, commutative: false},
|
||||
{name: "SaturatedSubInt16x16", argLength: 2, commutative: false},
|
||||
{name: "SaturatedSubInt16x32", argLength: 2, commutative: false},
|
||||
{name: "SaturatedSubMaskedInt8x16", argLength: 3, commutative: false},
|
||||
{name: "SaturatedSubMaskedInt8x32", argLength: 3, commutative: false},
|
||||
{name: "SaturatedSubMaskedInt8x64", argLength: 3, commutative: false},
|
||||
{name: "SaturatedSubMaskedInt16x8", argLength: 3, commutative: false},
|
||||
{name: "SaturatedSubMaskedInt16x16", argLength: 3, commutative: false},
|
||||
{name: "SaturatedSubMaskedInt16x32", argLength: 3, commutative: false},
|
||||
{name: "SaturatedSubMaskedUint8x16", argLength: 3, commutative: false},
|
||||
{name: "SaturatedSubMaskedUint8x32", argLength: 3, commutative: false},
|
||||
{name: "SaturatedSubMaskedUint8x64", argLength: 3, commutative: false},
|
||||
{name: "SaturatedSubMaskedUint16x8", argLength: 3, commutative: false},
|
||||
{name: "SaturatedSubMaskedUint16x16", argLength: 3, commutative: false},
|
||||
{name: "SaturatedSubMaskedUint16x32", argLength: 3, commutative: false},
|
||||
{name: "SaturatedSubUint8x16", argLength: 2, commutative: false},
|
||||
{name: "SaturatedSubUint8x32", argLength: 2, commutative: false},
|
||||
{name: "SaturatedSubUint8x64", argLength: 2, commutative: false},
|
||||
{name: "SaturatedSubUint16x8", argLength: 2, commutative: false},
|
||||
{name: "SaturatedSubUint16x16", argLength: 2, commutative: false},
|
||||
{name: "SaturatedSubUint16x32", argLength: 2, commutative: false},
|
||||
{name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x16", argLength: 3, commutative: false},
|
||||
{name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x32", argLength: 3, commutative: false},
|
||||
{name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x64", argLength: 3, commutative: false},
|
||||
|
|
@ -1218,6 +1168,18 @@ func simdGenericOps() []opData {
|
|||
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4", argLength: 4, commutative: false},
|
||||
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8", argLength: 4, commutative: false},
|
||||
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16", argLength: 4, commutative: false},
|
||||
{name: "ScaleFloat32x4", argLength: 2, commutative: false},
|
||||
{name: "ScaleFloat32x8", argLength: 2, commutative: false},
|
||||
{name: "ScaleFloat32x16", argLength: 2, commutative: false},
|
||||
{name: "ScaleFloat64x2", argLength: 2, commutative: false},
|
||||
{name: "ScaleFloat64x4", argLength: 2, commutative: false},
|
||||
{name: "ScaleFloat64x8", argLength: 2, commutative: false},
|
||||
{name: "ScaleMaskedFloat32x4", argLength: 3, commutative: false},
|
||||
{name: "ScaleMaskedFloat32x8", argLength: 3, commutative: false},
|
||||
{name: "ScaleMaskedFloat32x16", argLength: 3, commutative: false},
|
||||
{name: "ScaleMaskedFloat64x2", argLength: 3, commutative: false},
|
||||
{name: "ScaleMaskedFloat64x4", argLength: 3, commutative: false},
|
||||
{name: "ScaleMaskedFloat64x8", argLength: 3, commutative: false},
|
||||
{name: "ShiftAllLeftInt16x8", argLength: 2, commutative: false},
|
||||
{name: "ShiftAllLeftInt16x16", argLength: 2, commutative: false},
|
||||
{name: "ShiftAllLeftInt16x32", argLength: 2, commutative: false},
|
||||
|
|
@ -1500,6 +1462,44 @@ func simdGenericOps() []opData {
|
|||
{name: "SubMaskedUint64x2", argLength: 3, commutative: false},
|
||||
{name: "SubMaskedUint64x4", argLength: 3, commutative: false},
|
||||
{name: "SubMaskedUint64x8", argLength: 3, commutative: false},
|
||||
{name: "SubPairsFloat32x4", argLength: 2, commutative: false},
|
||||
{name: "SubPairsFloat32x8", argLength: 2, commutative: false},
|
||||
{name: "SubPairsFloat64x2", argLength: 2, commutative: false},
|
||||
{name: "SubPairsFloat64x4", argLength: 2, commutative: false},
|
||||
{name: "SubPairsInt16x8", argLength: 2, commutative: false},
|
||||
{name: "SubPairsInt16x16", argLength: 2, commutative: false},
|
||||
{name: "SubPairsInt32x4", argLength: 2, commutative: false},
|
||||
{name: "SubPairsInt32x8", argLength: 2, commutative: false},
|
||||
{name: "SubPairsSaturatedInt16x8", argLength: 2, commutative: false},
|
||||
{name: "SubPairsSaturatedInt16x16", argLength: 2, commutative: false},
|
||||
{name: "SubPairsUint16x8", argLength: 2, commutative: false},
|
||||
{name: "SubPairsUint16x16", argLength: 2, commutative: false},
|
||||
{name: "SubPairsUint32x4", argLength: 2, commutative: false},
|
||||
{name: "SubPairsUint32x8", argLength: 2, commutative: false},
|
||||
{name: "SubSaturatedInt8x16", argLength: 2, commutative: false},
|
||||
{name: "SubSaturatedInt8x32", argLength: 2, commutative: false},
|
||||
{name: "SubSaturatedInt8x64", argLength: 2, commutative: false},
|
||||
{name: "SubSaturatedInt16x8", argLength: 2, commutative: false},
|
||||
{name: "SubSaturatedInt16x16", argLength: 2, commutative: false},
|
||||
{name: "SubSaturatedInt16x32", argLength: 2, commutative: false},
|
||||
{name: "SubSaturatedMaskedInt8x16", argLength: 3, commutative: false},
|
||||
{name: "SubSaturatedMaskedInt8x32", argLength: 3, commutative: false},
|
||||
{name: "SubSaturatedMaskedInt8x64", argLength: 3, commutative: false},
|
||||
{name: "SubSaturatedMaskedInt16x8", argLength: 3, commutative: false},
|
||||
{name: "SubSaturatedMaskedInt16x16", argLength: 3, commutative: false},
|
||||
{name: "SubSaturatedMaskedInt16x32", argLength: 3, commutative: false},
|
||||
{name: "SubSaturatedMaskedUint8x16", argLength: 3, commutative: false},
|
||||
{name: "SubSaturatedMaskedUint8x32", argLength: 3, commutative: false},
|
||||
{name: "SubSaturatedMaskedUint8x64", argLength: 3, commutative: false},
|
||||
{name: "SubSaturatedMaskedUint16x8", argLength: 3, commutative: false},
|
||||
{name: "SubSaturatedMaskedUint16x16", argLength: 3, commutative: false},
|
||||
{name: "SubSaturatedMaskedUint16x32", argLength: 3, commutative: false},
|
||||
{name: "SubSaturatedUint8x16", argLength: 2, commutative: false},
|
||||
{name: "SubSaturatedUint8x32", argLength: 2, commutative: false},
|
||||
{name: "SubSaturatedUint8x64", argLength: 2, commutative: false},
|
||||
{name: "SubSaturatedUint16x8", argLength: 2, commutative: false},
|
||||
{name: "SubSaturatedUint16x16", argLength: 2, commutative: false},
|
||||
{name: "SubSaturatedUint16x32", argLength: 2, commutative: false},
|
||||
{name: "SubUint8x16", argLength: 2, commutative: false},
|
||||
{name: "SubUint8x32", argLength: 2, commutative: false},
|
||||
{name: "SubUint8x64", argLength: 2, commutative: false},
|
||||
|
|
@ -1558,78 +1558,54 @@ func simdGenericOps() []opData {
|
|||
{name: "XorUint64x2", argLength: 2, commutative: true},
|
||||
{name: "XorUint64x4", argLength: 2, commutative: true},
|
||||
{name: "XorUint64x8", argLength: 2, commutative: true},
|
||||
{name: "CeilWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "CeilWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "CeilWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "CeilWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "CeilWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "CeilWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "CeilWithPrecisionMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "CeilWithPrecisionMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "CeilWithPrecisionMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "CeilWithPrecisionMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "CeilWithPrecisionMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "CeilWithPrecisionMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithCeilWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithCeilWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithCeilWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithCeilWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithCeilWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithCeilWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithCeilWithPrecisionMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithCeilWithPrecisionMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithCeilWithPrecisionMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithCeilWithPrecisionMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithCeilWithPrecisionMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithCeilWithPrecisionMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithFloorWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithFloorWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithFloorWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithFloorWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithFloorWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithFloorWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithFloorWithPrecisionMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithFloorWithPrecisionMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithFloorWithPrecisionMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithFloorWithPrecisionMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithFloorWithPrecisionMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithFloorWithPrecisionMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithRoundWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithRoundWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithRoundWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithRoundWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithRoundWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithRoundWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithRoundWithPrecisionMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithRoundWithPrecisionMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithRoundWithPrecisionMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithRoundWithPrecisionMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithRoundWithPrecisionMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithRoundWithPrecisionMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithTruncWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithTruncWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithTruncWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithTruncWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithTruncWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithTruncWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithTruncWithPrecisionMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithTruncWithPrecisionMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithTruncWithPrecisionMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithTruncWithPrecisionMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithTruncWithPrecisionMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "DiffWithTruncWithPrecisionMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "FloorWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "FloorWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "FloorWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "FloorWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "FloorWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "FloorWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "FloorWithPrecisionMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "FloorWithPrecisionMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "FloorWithPrecisionMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "FloorWithPrecisionMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "FloorWithPrecisionMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "FloorWithPrecisionMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "CeilScaledFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "CeilScaledFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "CeilScaledFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "CeilScaledFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "CeilScaledFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "CeilScaledFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "CeilScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "CeilScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "CeilScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "CeilScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "CeilScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "CeilScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "CeilScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "CeilScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "CeilScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "CeilScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "CeilScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "CeilScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "CeilScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "CeilScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "CeilScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "CeilScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "CeilScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "CeilScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "FloorScaledFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "FloorScaledFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "FloorScaledFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "FloorScaledFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "FloorScaledFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "FloorScaledFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "FloorScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "FloorScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "FloorScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "FloorScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "FloorScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "FloorScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "FloorScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "FloorScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "FloorScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "FloorScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "FloorScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "FloorScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "FloorScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "FloorScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "FloorScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "FloorScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "FloorScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "FloorScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "GaloisFieldAffineTransformInverseMaskedUint8x16", argLength: 3, commutative: false, aux: "Int8"},
|
||||
{name: "GaloisFieldAffineTransformInverseMaskedUint8x32", argLength: 3, commutative: false, aux: "Int8"},
|
||||
{name: "GaloisFieldAffineTransformInverseMaskedUint8x64", argLength: 3, commutative: false, aux: "Int8"},
|
||||
|
|
@ -1708,18 +1684,30 @@ func simdGenericOps() []opData {
|
|||
{name: "RotateAllRightUint64x2", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "RotateAllRightUint64x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "RotateAllRightUint64x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "RoundWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "RoundWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "RoundWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "RoundWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "RoundWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "RoundWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "RoundWithPrecisionMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "RoundWithPrecisionMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "RoundWithPrecisionMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "RoundWithPrecisionMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "RoundWithPrecisionMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "RoundWithPrecisionMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "RoundScaledFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "RoundScaledFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "RoundScaledFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "RoundScaledFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "RoundScaledFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "RoundScaledFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "RoundScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "RoundScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "RoundScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "RoundScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "RoundScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "RoundScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "RoundScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "RoundScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "RoundScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "RoundScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "RoundScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "RoundScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "RoundScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "RoundScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "RoundScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "RoundScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "RoundScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "RoundScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "Set128Float32x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "Set128Float64x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "Set128Int8x32", argLength: 2, commutative: false, aux: "Int8"},
|
||||
|
|
@ -1810,17 +1798,29 @@ func simdGenericOps() []opData {
|
|||
{name: "ShiftAllRightConcatUint64x2", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "ShiftAllRightConcatUint64x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "ShiftAllRightConcatUint64x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "TruncWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "TruncWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "TruncWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "TruncWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "TruncWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "TruncWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "TruncWithPrecisionMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "TruncWithPrecisionMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "TruncWithPrecisionMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "TruncWithPrecisionMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "TruncWithPrecisionMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "TruncWithPrecisionMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "TruncScaledFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "TruncScaledFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "TruncScaledFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "TruncScaledFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "TruncScaledFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "TruncScaledFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "TruncScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "TruncScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "TruncScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "TruncScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "TruncScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "TruncScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "TruncScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "TruncScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "TruncScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "TruncScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "TruncScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "TruncScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
|
||||
{name: "TruncScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "TruncScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "TruncScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "TruncScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "TruncScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
|
||||
{name: "TruncScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
|
||||
}
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -101,6 +101,44 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Uint64x2.AddMasked", opLen3(ssa.OpAddMaskedUint64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x4.AddMasked", opLen3(ssa.OpAddMaskedUint64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x8.AddMasked", opLen3(ssa.OpAddMaskedUint64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.AddPairs", opLen2(ssa.OpAddPairsFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.AddPairs", opLen2(ssa.OpAddPairsFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.AddPairs", opLen2(ssa.OpAddPairsFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.AddPairs", opLen2(ssa.OpAddPairsFloat64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.AddPairs", opLen2(ssa.OpAddPairsInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.AddPairs", opLen2(ssa.OpAddPairsInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.AddPairs", opLen2(ssa.OpAddPairsInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.AddPairs", opLen2(ssa.OpAddPairsInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.AddPairs", opLen2(ssa.OpAddPairsUint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.AddPairs", opLen2(ssa.OpAddPairsUint16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.AddPairs", opLen2(ssa.OpAddPairsUint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.AddPairs", opLen2(ssa.OpAddPairsUint32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.AddPairsSaturated", opLen2(ssa.OpAddPairsSaturatedInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.AddPairsSaturated", opLen2(ssa.OpAddPairsSaturatedInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.AddSaturated", opLen2(ssa.OpAddSaturatedInt8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int8x32.AddSaturated", opLen2(ssa.OpAddSaturatedInt8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int8x64.AddSaturated", opLen2(ssa.OpAddSaturatedInt8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.AddSaturated", opLen2(ssa.OpAddSaturatedInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.AddSaturated", opLen2(ssa.OpAddSaturatedInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x32.AddSaturated", opLen2(ssa.OpAddSaturatedInt16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.AddSaturated", opLen2(ssa.OpAddSaturatedUint8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x32.AddSaturated", opLen2(ssa.OpAddSaturatedUint8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x64.AddSaturated", opLen2(ssa.OpAddSaturatedUint8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.AddSaturated", opLen2(ssa.OpAddSaturatedUint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.AddSaturated", opLen2(ssa.OpAddSaturatedUint16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x32.AddSaturated", opLen2(ssa.OpAddSaturatedUint16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int8x32.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int8x64.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x32.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x32.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x64.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x32.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.AddSub", opLen2(ssa.OpAddSubFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.AddSub", opLen2(ssa.OpAddSubFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.AddSub", opLen2(ssa.OpAddSubFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
|
|
@ -217,18 +255,30 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Float32x8.Ceil", opLen1(ssa.OpCeilFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.Ceil", opLen1(ssa.OpCeilFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.Ceil", opLen1(ssa.OpCeilFloat64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.CeilWithPrecisionMasked", opLen2Imm8(ssa.OpCeilWithPrecisionMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.CeilWithPrecisionMasked", opLen2Imm8(ssa.OpCeilWithPrecisionMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.CeilWithPrecisionMasked", opLen2Imm8(ssa.OpCeilWithPrecisionMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.CeilWithPrecisionMasked", opLen2Imm8(ssa.OpCeilWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.CeilWithPrecisionMasked", opLen2Imm8(ssa.OpCeilWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.CeilWithPrecisionMasked", opLen2Imm8(ssa.OpCeilWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.CeilScaled", opLen1Imm8(ssa.OpCeilScaledFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.CeilScaled", opLen1Imm8(ssa.OpCeilScaledFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.CeilScaled", opLen1Imm8(ssa.OpCeilScaledFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.CeilScaled", opLen1Imm8(ssa.OpCeilScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.CeilScaled", opLen1Imm8(ssa.OpCeilScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.CeilScaled", opLen1Imm8(ssa.OpCeilScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.Compress", opLen2(ssa.OpCompressFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.Compress", opLen2(ssa.OpCompressFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.Compress", opLen2(ssa.OpCompressFloat32x16, types.TypeVec512), sys.AMD64)
|
||||
|
|
@ -271,54 +321,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Float32x4.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.DiffWithCeilWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithCeilWithPrecisionMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.DiffWithCeilWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithCeilWithPrecisionMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.DiffWithCeilWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithCeilWithPrecisionMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.DiffWithCeilWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithCeilWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.DiffWithCeilWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithCeilWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.DiffWithCeilWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithCeilWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.DiffWithFloorWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithFloorWithPrecisionMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.DiffWithFloorWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithFloorWithPrecisionMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.DiffWithFloorWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithFloorWithPrecisionMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.DiffWithFloorWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithFloorWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.DiffWithFloorWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithFloorWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.DiffWithFloorWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithFloorWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.DiffWithRoundWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithRoundWithPrecisionMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.DiffWithRoundWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithRoundWithPrecisionMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.DiffWithRoundWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithRoundWithPrecisionMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.DiffWithRoundWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithRoundWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.DiffWithRoundWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithRoundWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.DiffWithRoundWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithRoundWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.DiffWithTruncWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithTruncWithPrecisionMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.DiffWithTruncWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithTruncWithPrecisionMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.DiffWithTruncWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithTruncWithPrecisionMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.DiffWithTruncWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithTruncWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.DiffWithTruncWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithTruncWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.DiffWithTruncWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithTruncWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.Div", opLen2(ssa.OpDivFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.Div", opLen2(ssa.OpDivFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.Div", opLen2(ssa.OpDivFloat32x16, types.TypeVec512), sys.AMD64)
|
||||
|
|
@ -398,18 +400,30 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Float32x8.Floor", opLen1(ssa.OpFloorFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.Floor", opLen1(ssa.OpFloorFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.Floor", opLen1(ssa.OpFloorFloat64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.FloorWithPrecisionMasked", opLen2Imm8(ssa.OpFloorWithPrecisionMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.FloorWithPrecisionMasked", opLen2Imm8(ssa.OpFloorWithPrecisionMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.FloorWithPrecisionMasked", opLen2Imm8(ssa.OpFloorWithPrecisionMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.FloorWithPrecisionMasked", opLen2Imm8(ssa.OpFloorWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.FloorWithPrecisionMasked", opLen2Imm8(ssa.OpFloorWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.FloorWithPrecisionMasked", opLen2Imm8(ssa.OpFloorWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.FloorScaled", opLen1Imm8(ssa.OpFloorScaledFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.FloorScaled", opLen1Imm8(ssa.OpFloorScaledFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.FloorScaled", opLen1Imm8(ssa.OpFloorScaledFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.FloorScaled", opLen1Imm8(ssa.OpFloorScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.FloorScaled", opLen1Imm8(ssa.OpFloorScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.FloorScaled", opLen1Imm8(ssa.OpFloorScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.FusedMultiplyAdd", opLen3(ssa.OpFusedMultiplyAddFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.FusedMultiplyAdd", opLen3(ssa.OpFusedMultiplyAddFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.FusedMultiplyAdd", opLen3(ssa.OpFusedMultiplyAddFloat32x16, types.TypeVec512), sys.AMD64)
|
||||
|
|
@ -860,18 +874,15 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Float64x2.Mul", opLen2(ssa.OpMulFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.Mul", opLen2(ssa.OpMulFloat64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.Mul", opLen2(ssa.OpMulFloat64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.MulByPowOf2Masked", opLen3(ssa.OpMulByPowOf2MaskedFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.MulByPowOf2Masked", opLen3(ssa.OpMulByPowOf2MaskedFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.MulByPowOf2Masked", opLen3(ssa.OpMulByPowOf2MaskedFloat32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.MulByPowOf2Masked", opLen3(ssa.OpMulByPowOf2MaskedFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.MulByPowOf2Masked", opLen3(ssa.OpMulByPowOf2MaskedFloat64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.MulByPowOf2Masked", opLen3(ssa.OpMulByPowOf2MaskedFloat64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.Mul", opLen2(ssa.OpMulInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.Mul", opLen2(ssa.OpMulInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x32.Mul", opLen2(ssa.OpMulInt16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.Mul", opLen2(ssa.OpMulInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.Mul", opLen2(ssa.OpMulInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.Mul", opLen2(ssa.OpMulInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.Mul", opLen2(ssa.OpMulInt64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int64x4.Mul", opLen2(ssa.OpMulInt64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int64x8.Mul", opLen2(ssa.OpMulInt64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt64x2, types.TypeVec128), sys.AMD64)
|
||||
|
|
@ -900,30 +911,21 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Uint16x8.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x32.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.MulLow", opLen2(ssa.OpMulLowInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.MulLow", opLen2(ssa.OpMulLowInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x32.MulLow", opLen2(ssa.OpMulLowInt16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.MulLow", opLen2(ssa.OpMulLowInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.MulLow", opLen2(ssa.OpMulLowInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.MulLow", opLen2(ssa.OpMulLowInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.MulLow", opLen2(ssa.OpMulLowInt64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int64x4.MulLow", opLen2(ssa.OpMulLowInt64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int64x8.MulLow", opLen2(ssa.OpMulLowInt64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x32.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int64x4.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int64x8.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.MulMasked", opLen3(ssa.OpMulMaskedFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.MulMasked", opLen3(ssa.OpMulMaskedFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.MulMasked", opLen3(ssa.OpMulMaskedFloat32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.MulMasked", opLen3(ssa.OpMulMaskedFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.MulMasked", opLen3(ssa.OpMulMaskedFloat64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.MulMasked", opLen3(ssa.OpMulMaskedFloat64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.MulMasked", opLen3(ssa.OpMulMaskedInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.MulMasked", opLen3(ssa.OpMulMaskedInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x32.MulMasked", opLen3(ssa.OpMulMaskedInt16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.MulMasked", opLen3(ssa.OpMulMaskedInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.MulMasked", opLen3(ssa.OpMulMaskedInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.MulMasked", opLen3(ssa.OpMulMaskedInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.MulMasked", opLen3(ssa.OpMulMaskedInt64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int64x4.MulMasked", opLen3(ssa.OpMulMaskedInt64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int64x8.MulMasked", opLen3(ssa.OpMulMaskedInt64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.NotEqual", opLen2(ssa.OpNotEqualFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.NotEqual", opLen2(ssa.OpNotEqualFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.NotEqual", opLen2(ssa.OpNotEqualFloat32x16, types.TypeVec512), sys.AMD64)
|
||||
|
|
@ -1026,30 +1028,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Int16x8.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x32.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.PairwiseAdd", opLen2(ssa.OpPairwiseAddFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.PairwiseAdd", opLen2(ssa.OpPairwiseAddFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.PairwiseAdd", opLen2(ssa.OpPairwiseAddFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.PairwiseAdd", opLen2(ssa.OpPairwiseAddFloat64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.PairwiseAdd", opLen2(ssa.OpPairwiseAddInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.PairwiseAdd", opLen2(ssa.OpPairwiseAddInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.PairwiseAdd", opLen2(ssa.OpPairwiseAddInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.PairwiseAdd", opLen2(ssa.OpPairwiseAddInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.PairwiseAdd", opLen2(ssa.OpPairwiseAddUint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.PairwiseAdd", opLen2(ssa.OpPairwiseAddUint16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.PairwiseAdd", opLen2(ssa.OpPairwiseAddUint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.PairwiseAdd", opLen2(ssa.OpPairwiseAddUint32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.PairwiseSub", opLen2(ssa.OpPairwiseSubFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.PairwiseSub", opLen2(ssa.OpPairwiseSubFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.PairwiseSub", opLen2(ssa.OpPairwiseSubFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.PairwiseSub", opLen2(ssa.OpPairwiseSubFloat64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.PairwiseSub", opLen2(ssa.OpPairwiseSubInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.PairwiseSub", opLen2(ssa.OpPairwiseSubInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.PairwiseSub", opLen2(ssa.OpPairwiseSubInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.PairwiseSub", opLen2(ssa.OpPairwiseSubInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.PairwiseSub", opLen2(ssa.OpPairwiseSubUint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.PairwiseSub", opLen2(ssa.OpPairwiseSubUint16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.PairwiseSub", opLen2(ssa.OpPairwiseSubUint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.PairwiseSub", opLen2(ssa.OpPairwiseSubUint32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.Permute", opLen2_21(ssa.OpPermuteInt8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.Permute", opLen2_21(ssa.OpPermuteUint8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int8x32.Permute", opLen2_21(ssa.OpPermuteInt8x32, types.TypeVec256), sys.AMD64)
|
||||
|
|
@ -1306,76 +1284,36 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Float32x8.Round", opLen1(ssa.OpRoundFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.Round", opLen1(ssa.OpRoundFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.Round", opLen1(ssa.OpRoundFloat64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.RoundWithPrecisionMasked", opLen2Imm8(ssa.OpRoundWithPrecisionMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.RoundWithPrecisionMasked", opLen2Imm8(ssa.OpRoundWithPrecisionMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.RoundWithPrecisionMasked", opLen2Imm8(ssa.OpRoundWithPrecisionMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.RoundWithPrecisionMasked", opLen2Imm8(ssa.OpRoundWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.RoundWithPrecisionMasked", opLen2Imm8(ssa.OpRoundWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.RoundWithPrecisionMasked", opLen2Imm8(ssa.OpRoundWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int8x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int8x64.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x64.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.RoundScaled", opLen1Imm8(ssa.OpRoundScaledFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.RoundScaled", opLen1Imm8(ssa.OpRoundScaledFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.RoundScaled", opLen1Imm8(ssa.OpRoundScaledFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.RoundScaled", opLen1Imm8(ssa.OpRoundScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.RoundScaled", opLen1Imm8(ssa.OpRoundScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.RoundScaled", opLen1Imm8(ssa.OpRoundScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.RoundScaledMasked", opLen2Imm8(ssa.OpRoundScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.RoundScaledMasked", opLen2Imm8(ssa.OpRoundScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.RoundScaledMasked", opLen2Imm8(ssa.OpRoundScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.RoundScaledMasked", opLen2Imm8(ssa.OpRoundScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.RoundScaledMasked", opLen2Imm8(ssa.OpRoundScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.RoundScaledMasked", opLen2Imm8(ssa.OpRoundScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.RoundScaledResidue", opLen1Imm8(ssa.OpRoundScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.RoundScaledResidue", opLen1Imm8(ssa.OpRoundScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.RoundScaledResidue", opLen1Imm8(ssa.OpRoundScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.RoundScaledResidue", opLen1Imm8(ssa.OpRoundScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.RoundScaledResidue", opLen1Imm8(ssa.OpRoundScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.RoundScaledResidue", opLen1Imm8(ssa.OpRoundScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.RoundScaledResidueMasked", opLen2Imm8(ssa.OpRoundScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.RoundScaledResidueMasked", opLen2Imm8(ssa.OpRoundScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.RoundScaledResidueMasked", opLen2Imm8(ssa.OpRoundScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.RoundScaledResidueMasked", opLen2Imm8(ssa.OpRoundScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.RoundScaledResidueMasked", opLen2Imm8(ssa.OpRoundScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.RoundScaledResidueMasked", opLen2Imm8(ssa.OpRoundScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.SaturatedAddDotProd", opLen3(ssa.OpSaturatedAddDotProdInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.SaturatedAddDotProd", opLen3(ssa.OpSaturatedAddDotProdInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.SaturatedAddDotProd", opLen3(ssa.OpSaturatedAddDotProdInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.SaturatedAddDotProdMasked", opLen4(ssa.OpSaturatedAddDotProdMaskedInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.SaturatedAddDotProdMasked", opLen4(ssa.OpSaturatedAddDotProdMaskedInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.SaturatedAddDotProdMasked", opLen4(ssa.OpSaturatedAddDotProdMaskedInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedInt8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int8x32.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedInt8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int8x64.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedInt8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x32.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedInt16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x32.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x64.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x32.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.SaturatedPairwiseAdd", opLen2(ssa.OpSaturatedPairwiseAddInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.SaturatedPairwiseAdd", opLen2(ssa.OpSaturatedPairwiseAddInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.SaturatedPairwiseSub", opLen2(ssa.OpSaturatedPairwiseSubInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.SaturatedPairwiseSub", opLen2(ssa.OpSaturatedPairwiseSubInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.SaturatedSub", opLen2(ssa.OpSaturatedSubInt8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int8x32.SaturatedSub", opLen2(ssa.OpSaturatedSubInt8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int8x64.SaturatedSub", opLen2(ssa.OpSaturatedSubInt8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.SaturatedSub", opLen2(ssa.OpSaturatedSubInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.SaturatedSub", opLen2(ssa.OpSaturatedSubInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x32.SaturatedSub", opLen2(ssa.OpSaturatedSubInt16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.SaturatedSub", opLen2(ssa.OpSaturatedSubUint8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x32.SaturatedSub", opLen2(ssa.OpSaturatedSubUint8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x64.SaturatedSub", opLen2(ssa.OpSaturatedSubUint8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x32.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedInt8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int8x32.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedInt8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int8x64.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedInt8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x32.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedInt16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedUint8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x32.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedUint8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x64.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedUint8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedUint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedUint16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x32.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedUint16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x32.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x64.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x64, types.TypeVec512), sys.AMD64)
|
||||
|
|
@ -1388,6 +1326,18 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Int8x16.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int8x32.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int8x64.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.Scale", opLen2(ssa.OpScaleFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.Scale", opLen2(ssa.OpScaleFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.Scale", opLen2(ssa.OpScaleFloat32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.Scale", opLen2(ssa.OpScaleFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.Scale", opLen2(ssa.OpScaleFloat64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.Scale", opLen2(ssa.OpScaleFloat64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.Set128", opLen2Imm8(ssa.OpSet128Float32x8, types.TypeVec256, 0), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.Set128", opLen2Imm8(ssa.OpSet128Float64x4, types.TypeVec256, 0), sys.AMD64)
|
||||
addF(simdPackage, "Int8x32.Set128", opLen2Imm8(ssa.OpSet128Int8x32, types.TypeVec256, 0), sys.AMD64)
|
||||
|
|
@ -1772,22 +1722,72 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Uint64x2.SubMasked", opLen3(ssa.OpSubMaskedUint64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x4.SubMasked", opLen3(ssa.OpSubMaskedUint64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x8.SubMasked", opLen3(ssa.OpSubMaskedUint64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.SubPairs", opLen2(ssa.OpSubPairsFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.SubPairs", opLen2(ssa.OpSubPairsFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.SubPairs", opLen2(ssa.OpSubPairsFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.SubPairs", opLen2(ssa.OpSubPairsFloat64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.SubPairs", opLen2(ssa.OpSubPairsInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.SubPairs", opLen2(ssa.OpSubPairsInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.SubPairs", opLen2(ssa.OpSubPairsInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.SubPairs", opLen2(ssa.OpSubPairsInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.SubPairs", opLen2(ssa.OpSubPairsUint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.SubPairs", opLen2(ssa.OpSubPairsUint16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.SubPairs", opLen2(ssa.OpSubPairsUint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.SubPairs", opLen2(ssa.OpSubPairsUint32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.SubPairsSaturated", opLen2(ssa.OpSubPairsSaturatedInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.SubPairsSaturated", opLen2(ssa.OpSubPairsSaturatedInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.SubSaturated", opLen2(ssa.OpSubSaturatedInt8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int8x32.SubSaturated", opLen2(ssa.OpSubSaturatedInt8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int8x64.SubSaturated", opLen2(ssa.OpSubSaturatedInt8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.SubSaturated", opLen2(ssa.OpSubSaturatedInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.SubSaturated", opLen2(ssa.OpSubSaturatedInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x32.SubSaturated", opLen2(ssa.OpSubSaturatedInt16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.SubSaturated", opLen2(ssa.OpSubSaturatedUint8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x32.SubSaturated", opLen2(ssa.OpSubSaturatedUint8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x64.SubSaturated", opLen2(ssa.OpSubSaturatedUint8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.SubSaturated", opLen2(ssa.OpSubSaturatedUint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.SubSaturated", opLen2(ssa.OpSubSaturatedUint16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x32.SubSaturated", opLen2(ssa.OpSubSaturatedUint16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int8x32.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int8x64.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x16.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x32.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x32.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint8x32, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x64.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint8x64, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x32.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.Trunc", opLen1(ssa.OpTruncFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.Trunc", opLen1(ssa.OpTruncFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.Trunc", opLen1(ssa.OpTruncFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.Trunc", opLen1(ssa.OpTruncFloat64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.TruncScaled", opLen1Imm8(ssa.OpTruncScaledFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.TruncScaled", opLen1Imm8(ssa.OpTruncScaledFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.TruncScaled", opLen1Imm8(ssa.OpTruncScaledFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.TruncScaled", opLen1Imm8(ssa.OpTruncScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.TruncScaled", opLen1Imm8(ssa.OpTruncScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.TruncScaled", opLen1Imm8(ssa.OpTruncScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float32x16.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.UnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int8x32.UnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int8x64.UnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
|
||||
|
|
|
|||
|
|
@ -309,42 +309,42 @@ func TestMul(t *testing.T) {
|
|||
testFloat64x2Binary(t, simd.Float64x2.Mul, mulSlice[float64])
|
||||
testFloat64x4Binary(t, simd.Float64x4.Mul, mulSlice[float64])
|
||||
|
||||
testInt16x16Binary(t, simd.Int16x16.MulLow, mulSlice[int16])
|
||||
testInt16x8Binary(t, simd.Int16x8.MulLow, mulSlice[int16])
|
||||
testInt32x4Binary(t, simd.Int32x4.MulLow, mulSlice[int32])
|
||||
testInt32x8Binary(t, simd.Int32x8.MulLow, mulSlice[int32])
|
||||
testInt16x16Binary(t, simd.Int16x16.Mul, mulSlice[int16])
|
||||
testInt16x8Binary(t, simd.Int16x8.Mul, mulSlice[int16])
|
||||
testInt32x4Binary(t, simd.Int32x4.Mul, mulSlice[int32])
|
||||
testInt32x8Binary(t, simd.Int32x8.Mul, mulSlice[int32])
|
||||
|
||||
// testInt8x16Binary(t, simd.Int8x16.MulLow, mulSlice[int8]) // nope
|
||||
// testInt8x32Binary(t, simd.Int8x32.MulLow, mulSlice[int8])
|
||||
// testInt8x16Binary(t, simd.Int8x16.Mul, mulSlice[int8]) // nope
|
||||
// testInt8x32Binary(t, simd.Int8x32.Mul, mulSlice[int8])
|
||||
|
||||
// TODO we should be able to do these, there's no difference between signed/unsigned mulLow
|
||||
// testUint16x16Binary(t, simd.Uint16x16.MulLow, mulSlice[uint16])
|
||||
// testUint16x8Binary(t, simd.Uint16x8.MulLow, mulSlice[uint16])
|
||||
// testUint32x4Binary(t, simd.Uint32x4.MulLow, mulSlice[uint32])
|
||||
// testUint32x8Binary(t, simd.Uint32x8.MulLow, mulSlice[uint32])
|
||||
// testUint64x2Binary(t, simd.Uint64x2.MulLow, mulSlice[uint64])
|
||||
// testUint64x4Binary(t, simd.Uint64x4.MulLow, mulSlice[uint64])
|
||||
// TODO we should be able to do these, there's no difference between signed/unsigned Mul
|
||||
// testUint16x16Binary(t, simd.Uint16x16.Mul, mulSlice[uint16])
|
||||
// testUint16x8Binary(t, simd.Uint16x8.Mul, mulSlice[uint16])
|
||||
// testUint32x4Binary(t, simd.Uint32x4.Mul, mulSlice[uint32])
|
||||
// testUint32x8Binary(t, simd.Uint32x8.Mul, mulSlice[uint32])
|
||||
// testUint64x2Binary(t, simd.Uint64x2.Mul, mulSlice[uint64])
|
||||
// testUint64x4Binary(t, simd.Uint64x4.Mul, mulSlice[uint64])
|
||||
|
||||
// testUint8x16Binary(t, simd.Uint8x16.MulLow, mulSlice[uint8]) // nope
|
||||
// testUint8x32Binary(t, simd.Uint8x32.MulLow, mulSlice[uint8])
|
||||
// testUint8x16Binary(t, simd.Uint8x16.Mul, mulSlice[uint8]) // nope
|
||||
// testUint8x32Binary(t, simd.Uint8x32.Mul, mulSlice[uint8])
|
||||
|
||||
if simd.HasAVX512() {
|
||||
testInt64x2Binary(t, simd.Int64x2.MulLow, mulSlice[int64]) // avx512 only
|
||||
testInt64x4Binary(t, simd.Int64x4.MulLow, mulSlice[int64])
|
||||
testInt64x2Binary(t, simd.Int64x2.Mul, mulSlice[int64]) // avx512 only
|
||||
testInt64x4Binary(t, simd.Int64x4.Mul, mulSlice[int64])
|
||||
|
||||
testFloat32x16Binary(t, simd.Float32x16.Mul, mulSlice[float32])
|
||||
testFloat64x8Binary(t, simd.Float64x8.Mul, mulSlice[float64])
|
||||
|
||||
// testInt8x64Binary(t, simd.Int8x64.MulLow, mulSlice[int8]) // nope
|
||||
testInt16x32Binary(t, simd.Int16x32.MulLow, mulSlice[int16])
|
||||
testInt32x16Binary(t, simd.Int32x16.MulLow, mulSlice[int32])
|
||||
testInt64x8Binary(t, simd.Int64x8.MulLow, mulSlice[int64])
|
||||
// testUint8x64Binary(t, simd.Uint8x64.MulLow, mulSlice[uint8]) // nope
|
||||
// testInt8x64Binary(t, simd.Int8x64.Mul, mulSlice[int8]) // nope
|
||||
testInt16x32Binary(t, simd.Int16x32.Mul, mulSlice[int16])
|
||||
testInt32x16Binary(t, simd.Int32x16.Mul, mulSlice[int32])
|
||||
testInt64x8Binary(t, simd.Int64x8.Mul, mulSlice[int64])
|
||||
// testUint8x64Binary(t, simd.Uint8x64.Mul, mulSlice[uint8]) // nope
|
||||
|
||||
// TODO signed should do the job
|
||||
// testUint16x32Binary(t, simd.Uint16x32.MulLow, mulSlice[uint16])
|
||||
// testUint32x16Binary(t, simd.Uint32x16.MulLow, mulSlice[uint32])
|
||||
// testUint64x8Binary(t, simd.Uint64x8.MulLow, mulSlice[uint64])
|
||||
// testUint16x32Binary(t, simd.Uint16x32.Mul, mulSlice[uint16])
|
||||
// testUint32x16Binary(t, simd.Uint32x16.Mul, mulSlice[uint32])
|
||||
// testUint64x8Binary(t, simd.Uint64x8.Mul, mulSlice[uint64])
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -89,20 +89,20 @@ func TestToInt32(t *testing.T) {
|
|||
testFloat32x8UnaryToInt32(t, simd.Float32x8.ConvertToInt32, toInt32Slice[float32])
|
||||
}
|
||||
|
||||
func TestDiffWithCeilWithPrecision(t *testing.T) {
|
||||
func TestCeilScaledResidue(t *testing.T) {
|
||||
if !simd.HasAVX512() {
|
||||
t.Skip("Needs AVX512")
|
||||
}
|
||||
testFloat64x8UnaryFlaky(t,
|
||||
func(x simd.Float64x8) simd.Float64x8 { return x.DiffWithCeilWithPrecision(0) },
|
||||
func(x simd.Float64x8) simd.Float64x8 { return x.CeilScaledResidue(0) },
|
||||
map1(ceilResidueForPrecision[float64](0)),
|
||||
0.001)
|
||||
testFloat64x8UnaryFlaky(t,
|
||||
func(x simd.Float64x8) simd.Float64x8 { return x.DiffWithCeilWithPrecision(1) },
|
||||
func(x simd.Float64x8) simd.Float64x8 { return x.CeilScaledResidue(1) },
|
||||
map1(ceilResidueForPrecision[float64](1)),
|
||||
0.001)
|
||||
testFloat64x8Unary(t,
|
||||
func(x simd.Float64x8) simd.Float64x8 { return x.Sub(x.CeilWithPrecision(0)) },
|
||||
func(x simd.Float64x8) simd.Float64x8 { return x.Sub(x.CeilScaled(0)) },
|
||||
map1[float64](func(x float64) float64 { return x - math.Ceil(x) }))
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue