From e61ebfce564086e5e2d634b0d138d96b6e34c19a Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Tue, 24 Jun 2025 15:21:29 +0000 Subject: [PATCH] [dev.simd] cmd/compile, simd: add shift operations This CL is generated by CL 683475. Change-Id: I9e3ac6aff6f711cb26ff85e4c8729d9e2cc38e7d Reviewed-on: https://go-review.googlesource.com/c/go/+/683715 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- src/cmd/compile/internal/amd64/simdssa.go | 312 +- .../compile/internal/ssa/_gen/simdAMD64.rules | 398 ++ .../compile/internal/ssa/_gen/simdAMD64ops.go | 204 + .../internal/ssa/_gen/simdgenericOps.go | 398 ++ src/cmd/compile/internal/ssa/opGen.go | 5772 +++++++++++++++++ src/cmd/compile/internal/ssa/rewriteAMD64.go | 5343 +++++++++++++++ .../compile/internal/ssagen/simdintrinsics.go | 398 ++ src/simd/simd_wrapped_test.go | 1245 ++++ src/simd/stubs_amd64.go | 2190 +++++++ 9 files changed, 16257 insertions(+), 3 deletions(-) diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go index 52976803578..6c1d365bfa7 100644 --- a/src/cmd/compile/internal/amd64/simdssa.go +++ b/src/cmd/compile/internal/amd64/simdssa.go @@ -247,6 +247,18 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPHSUBW256, ssa.OpAMD64VPHSUBD128, ssa.OpAMD64VPHSUBD256, + ssa.OpAMD64VPROLVD128, + ssa.OpAMD64VPROLVD256, + ssa.OpAMD64VPROLVD512, + ssa.OpAMD64VPROLVQ128, + ssa.OpAMD64VPROLVQ256, + ssa.OpAMD64VPROLVQ512, + ssa.OpAMD64VPRORVD128, + ssa.OpAMD64VPRORVD256, + ssa.OpAMD64VPRORVD512, + ssa.OpAMD64VPRORVQ128, + ssa.OpAMD64VPRORVQ256, + ssa.OpAMD64VPRORVQ512, ssa.OpAMD64VPADDSB128, ssa.OpAMD64VPADDSB256, ssa.OpAMD64VPADDSB512, @@ -266,6 +278,33 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMADDUBSW128, ssa.OpAMD64VPMADDUBSW256, ssa.OpAMD64VPMADDUBSW512, + ssa.OpAMD64VPSLLVW128, + ssa.OpAMD64VPSLLVW256, + ssa.OpAMD64VPSLLVW512, + ssa.OpAMD64VPSLLVD128, + ssa.OpAMD64VPSLLVD256, + ssa.OpAMD64VPSLLVD512, + ssa.OpAMD64VPSLLVQ128, + ssa.OpAMD64VPSLLVQ256, + ssa.OpAMD64VPSLLVQ512, + ssa.OpAMD64VPSRLVW128, + ssa.OpAMD64VPSRLVW256, + ssa.OpAMD64VPSRLVW512, + ssa.OpAMD64VPSRLVD128, + ssa.OpAMD64VPSRLVD256, + ssa.OpAMD64VPSRLVD512, + ssa.OpAMD64VPSRLVQ128, + ssa.OpAMD64VPSRLVQ256, + ssa.OpAMD64VPSRLVQ512, + ssa.OpAMD64VPSRAVW128, + ssa.OpAMD64VPSRAVW256, + ssa.OpAMD64VPSRAVW512, + ssa.OpAMD64VPSRAVD128, + ssa.OpAMD64VPSRAVD256, + ssa.OpAMD64VPSRAVD512, + ssa.OpAMD64VPSRAVQ128, + ssa.OpAMD64VPSRAVQ256, + ssa.OpAMD64VPSRAVQ512, ssa.OpAMD64VPSIGNB128, ssa.OpAMD64VPSIGNB256, ssa.OpAMD64VPSIGNW128, @@ -464,6 +503,18 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMADDWDMasked128, ssa.OpAMD64VPMADDWDMasked256, ssa.OpAMD64VPMADDWDMasked512, + ssa.OpAMD64VPROLVDMasked128, + ssa.OpAMD64VPROLVDMasked256, + ssa.OpAMD64VPROLVDMasked512, + ssa.OpAMD64VPROLVQMasked128, + ssa.OpAMD64VPROLVQMasked256, + ssa.OpAMD64VPROLVQMasked512, + ssa.OpAMD64VPRORVDMasked128, + ssa.OpAMD64VPRORVDMasked256, + ssa.OpAMD64VPRORVDMasked512, + ssa.OpAMD64VPRORVQMasked128, + ssa.OpAMD64VPRORVQMasked256, + ssa.OpAMD64VPRORVQMasked512, ssa.OpAMD64VPADDSBMasked128, ssa.OpAMD64VPADDSBMasked256, ssa.OpAMD64VPADDSBMasked512, @@ -479,6 +530,33 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMADDUBSWMasked128, ssa.OpAMD64VPMADDUBSWMasked256, ssa.OpAMD64VPMADDUBSWMasked512, + ssa.OpAMD64VPSLLVWMasked128, + ssa.OpAMD64VPSLLVWMasked256, + ssa.OpAMD64VPSLLVWMasked512, + ssa.OpAMD64VPSLLVDMasked128, + ssa.OpAMD64VPSLLVDMasked256, + ssa.OpAMD64VPSLLVDMasked512, + ssa.OpAMD64VPSLLVQMasked128, + ssa.OpAMD64VPSLLVQMasked256, + ssa.OpAMD64VPSLLVQMasked512, + ssa.OpAMD64VPSRLVWMasked128, + ssa.OpAMD64VPSRLVWMasked256, + ssa.OpAMD64VPSRLVWMasked512, + ssa.OpAMD64VPSRLVDMasked128, + ssa.OpAMD64VPSRLVDMasked256, + ssa.OpAMD64VPSRLVDMasked512, + ssa.OpAMD64VPSRLVQMasked128, + ssa.OpAMD64VPSRLVQMasked256, + ssa.OpAMD64VPSRLVQMasked512, + ssa.OpAMD64VPSRAVWMasked128, + ssa.OpAMD64VPSRAVWMasked256, + ssa.OpAMD64VPSRAVWMasked512, + ssa.OpAMD64VPSRAVDMasked128, + ssa.OpAMD64VPSRAVDMasked256, + ssa.OpAMD64VPSRAVDMasked512, + ssa.OpAMD64VPSRAVQMasked128, + ssa.OpAMD64VPSRAVQMasked256, + ssa.OpAMD64VPSRAVQMasked512, ssa.OpAMD64VSUBPSMasked128, ssa.OpAMD64VSUBPSMasked256, ssa.OpAMD64VSUBPSMasked512, @@ -570,7 +648,19 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VREDUCEPS512, ssa.OpAMD64VREDUCEPD128, ssa.OpAMD64VREDUCEPD256, - ssa.OpAMD64VREDUCEPD512: + ssa.OpAMD64VREDUCEPD512, + ssa.OpAMD64VPROLD128, + ssa.OpAMD64VPROLD256, + ssa.OpAMD64VPROLD512, + ssa.OpAMD64VPROLQ128, + ssa.OpAMD64VPROLQ256, + ssa.OpAMD64VPROLQ512, + ssa.OpAMD64VPRORD128, + ssa.OpAMD64VPRORD256, + ssa.OpAMD64VPRORD512, + ssa.OpAMD64VPRORQ128, + ssa.OpAMD64VPRORQ256, + ssa.OpAMD64VPRORQ512: p = simdFp11Imm8(s, v) case ssa.OpAMD64VRNDSCALEPSMasked128, @@ -584,14 +674,44 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VREDUCEPSMasked512, ssa.OpAMD64VREDUCEPDMasked128, ssa.OpAMD64VREDUCEPDMasked256, - ssa.OpAMD64VREDUCEPDMasked512: + ssa.OpAMD64VREDUCEPDMasked512, + ssa.OpAMD64VPROLDMasked128, + ssa.OpAMD64VPROLDMasked256, + ssa.OpAMD64VPROLDMasked512, + ssa.OpAMD64VPROLQMasked128, + ssa.OpAMD64VPROLQMasked256, + ssa.OpAMD64VPROLQMasked512, + ssa.OpAMD64VPRORDMasked128, + ssa.OpAMD64VPRORDMasked256, + ssa.OpAMD64VPRORDMasked512, + ssa.OpAMD64VPRORQMasked128, + ssa.OpAMD64VPRORQMasked256, + ssa.OpAMD64VPRORQMasked512: p = simdFpkfpImm8(s, v) case ssa.OpAMD64VDPPD128, ssa.OpAMD64VCMPPS128, ssa.OpAMD64VCMPPS256, ssa.OpAMD64VCMPPD128, - ssa.OpAMD64VCMPPD256: + ssa.OpAMD64VCMPPD256, + ssa.OpAMD64VPSHLDW128, + ssa.OpAMD64VPSHLDW256, + ssa.OpAMD64VPSHLDW512, + ssa.OpAMD64VPSHLDD128, + ssa.OpAMD64VPSHLDD256, + ssa.OpAMD64VPSHLDD512, + ssa.OpAMD64VPSHLDQ128, + ssa.OpAMD64VPSHLDQ256, + ssa.OpAMD64VPSHLDQ512, + ssa.OpAMD64VPSHRDW128, + ssa.OpAMD64VPSHRDW256, + ssa.OpAMD64VPSHRDW512, + ssa.OpAMD64VPSHRDD128, + ssa.OpAMD64VPSHRDD256, + ssa.OpAMD64VPSHRDD512, + ssa.OpAMD64VPSHRDQ128, + ssa.OpAMD64VPSHRDQ256, + ssa.OpAMD64VPSHRDQ512: p = simdFp21Imm8(s, v) case ssa.OpAMD64VCMPPS512, @@ -681,6 +801,24 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPDPBUSDS128, ssa.OpAMD64VPDPBUSDS256, ssa.OpAMD64VPDPBUSDS512, + ssa.OpAMD64VPSHLDVW128, + ssa.OpAMD64VPSHLDVW256, + ssa.OpAMD64VPSHLDVW512, + ssa.OpAMD64VPSHLDVD128, + ssa.OpAMD64VPSHLDVD256, + ssa.OpAMD64VPSHLDVD512, + ssa.OpAMD64VPSHLDVQ128, + ssa.OpAMD64VPSHLDVQ256, + ssa.OpAMD64VPSHLDVQ512, + ssa.OpAMD64VPSHRDVW128, + ssa.OpAMD64VPSHRDVW256, + ssa.OpAMD64VPSHRDVW512, + ssa.OpAMD64VPSHRDVD128, + ssa.OpAMD64VPSHRDVD256, + ssa.OpAMD64VPSHRDVD512, + ssa.OpAMD64VPSHRDVQ128, + ssa.OpAMD64VPSHRDVQ256, + ssa.OpAMD64VPSHRDVQ512, ssa.OpAMD64VPDPBUSD128, ssa.OpAMD64VPDPBUSD256, ssa.OpAMD64VPDPBUSD512: @@ -713,11 +851,63 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPDPBUSDSMasked128, ssa.OpAMD64VPDPBUSDSMasked256, ssa.OpAMD64VPDPBUSDSMasked512, + ssa.OpAMD64VPSHLDVWMasked128, + ssa.OpAMD64VPSHLDVWMasked256, + ssa.OpAMD64VPSHLDVWMasked512, + ssa.OpAMD64VPSHLDVDMasked128, + ssa.OpAMD64VPSHLDVDMasked256, + ssa.OpAMD64VPSHLDVDMasked512, + ssa.OpAMD64VPSHLDVQMasked128, + ssa.OpAMD64VPSHLDVQMasked256, + ssa.OpAMD64VPSHLDVQMasked512, + ssa.OpAMD64VPSHRDVWMasked128, + ssa.OpAMD64VPSHRDVWMasked256, + ssa.OpAMD64VPSHRDVWMasked512, + ssa.OpAMD64VPSHRDVDMasked128, + ssa.OpAMD64VPSHRDVDMasked256, + ssa.OpAMD64VPSHRDVDMasked512, + ssa.OpAMD64VPSHRDVQMasked128, + ssa.OpAMD64VPSHRDVQMasked256, + ssa.OpAMD64VPSHRDVQMasked512, ssa.OpAMD64VPDPBUSDMasked128, ssa.OpAMD64VPDPBUSDMasked256, ssa.OpAMD64VPDPBUSDMasked512: p = simdFp3kfpResultInArg0(s, v) + case ssa.OpAMD64VPSLLW128, + ssa.OpAMD64VPSLLW256, + ssa.OpAMD64VPSLLD128, + ssa.OpAMD64VPSLLD256, + ssa.OpAMD64VPSLLQ128, + ssa.OpAMD64VPSLLQ256, + ssa.OpAMD64VPSLLQ512, + ssa.OpAMD64VPSRLW128, + ssa.OpAMD64VPSRLW256, + ssa.OpAMD64VPSRLD128, + ssa.OpAMD64VPSRLD256, + ssa.OpAMD64VPSRLQ128, + ssa.OpAMD64VPSRLQ256, + ssa.OpAMD64VPSRLQ512, + ssa.OpAMD64VPSRAW128, + ssa.OpAMD64VPSRAW256, + ssa.OpAMD64VPSRAD128, + ssa.OpAMD64VPSRAD256, + ssa.OpAMD64VPSRAQ128, + ssa.OpAMD64VPSRAQ256, + ssa.OpAMD64VPSRAQ512: + p = simdFpXfp(s, v) + + case ssa.OpAMD64VPSLLQMasked128, + ssa.OpAMD64VPSLLQMasked256, + ssa.OpAMD64VPSLLQMasked512, + ssa.OpAMD64VPSRLQMasked128, + ssa.OpAMD64VPSRLQMasked256, + ssa.OpAMD64VPSRLQMasked512, + ssa.OpAMD64VPSRAQMasked128, + ssa.OpAMD64VPSRAQMasked256, + ssa.OpAMD64VPSRAQMasked512: + p = simdFpXkfp(s, v) + case ssa.OpAMD64VPINSRB128, ssa.OpAMD64VPINSRW128, ssa.OpAMD64VPINSRD128, @@ -730,6 +920,26 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPEXTRQ128: p = simdFpgpImm8(s, v) + case ssa.OpAMD64VPSHLDWMasked128, + ssa.OpAMD64VPSHLDWMasked256, + ssa.OpAMD64VPSHLDWMasked512, + ssa.OpAMD64VPSHLDDMasked128, + ssa.OpAMD64VPSHLDDMasked256, + ssa.OpAMD64VPSHLDDMasked512, + ssa.OpAMD64VPSHLDQMasked128, + ssa.OpAMD64VPSHLDQMasked256, + ssa.OpAMD64VPSHLDQMasked512, + ssa.OpAMD64VPSHRDWMasked128, + ssa.OpAMD64VPSHRDWMasked256, + ssa.OpAMD64VPSHRDWMasked512, + ssa.OpAMD64VPSHRDDMasked128, + ssa.OpAMD64VPSHRDDMasked256, + ssa.OpAMD64VPSHRDDMasked512, + ssa.OpAMD64VPSHRDQMasked128, + ssa.OpAMD64VPSHRDQMasked256, + ssa.OpAMD64VPSHRDQMasked512: + p = simdFp2kfpImm8(s, v) + default: // Unknown reg shape return false @@ -968,6 +1178,30 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPOPCNTQMasked128, ssa.OpAMD64VPOPCNTQMasked256, ssa.OpAMD64VPOPCNTQMasked512, + ssa.OpAMD64VPROLDMasked128, + ssa.OpAMD64VPROLDMasked256, + ssa.OpAMD64VPROLDMasked512, + ssa.OpAMD64VPROLQMasked128, + ssa.OpAMD64VPROLQMasked256, + ssa.OpAMD64VPROLQMasked512, + ssa.OpAMD64VPRORDMasked128, + ssa.OpAMD64VPRORDMasked256, + ssa.OpAMD64VPRORDMasked512, + ssa.OpAMD64VPRORQMasked128, + ssa.OpAMD64VPRORQMasked256, + ssa.OpAMD64VPRORQMasked512, + ssa.OpAMD64VPROLVDMasked128, + ssa.OpAMD64VPROLVDMasked256, + ssa.OpAMD64VPROLVDMasked512, + ssa.OpAMD64VPROLVQMasked128, + ssa.OpAMD64VPROLVQMasked256, + ssa.OpAMD64VPROLVQMasked512, + ssa.OpAMD64VPRORVDMasked128, + ssa.OpAMD64VPRORVDMasked256, + ssa.OpAMD64VPRORVDMasked512, + ssa.OpAMD64VPRORVQMasked128, + ssa.OpAMD64VPRORVQMasked256, + ssa.OpAMD64VPRORVQMasked512, ssa.OpAMD64VPADDSBMasked128, ssa.OpAMD64VPADDSBMasked256, ssa.OpAMD64VPADDSBMasked512, @@ -989,6 +1223,78 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPDPBUSDSMasked128, ssa.OpAMD64VPDPBUSDSMasked256, ssa.OpAMD64VPDPBUSDSMasked512, + ssa.OpAMD64VPSLLQMasked128, + ssa.OpAMD64VPSLLQMasked256, + ssa.OpAMD64VPSLLQMasked512, + ssa.OpAMD64VPSHLDWMasked128, + ssa.OpAMD64VPSHLDWMasked256, + ssa.OpAMD64VPSHLDWMasked512, + ssa.OpAMD64VPSHLDDMasked128, + ssa.OpAMD64VPSHLDDMasked256, + ssa.OpAMD64VPSHLDDMasked512, + ssa.OpAMD64VPSHLDQMasked128, + ssa.OpAMD64VPSHLDQMasked256, + ssa.OpAMD64VPSHLDQMasked512, + ssa.OpAMD64VPSRLQMasked128, + ssa.OpAMD64VPSRLQMasked256, + ssa.OpAMD64VPSRLQMasked512, + ssa.OpAMD64VPSHRDWMasked128, + ssa.OpAMD64VPSHRDWMasked256, + ssa.OpAMD64VPSHRDWMasked512, + ssa.OpAMD64VPSHRDDMasked128, + ssa.OpAMD64VPSHRDDMasked256, + ssa.OpAMD64VPSHRDDMasked512, + ssa.OpAMD64VPSHRDQMasked128, + ssa.OpAMD64VPSHRDQMasked256, + ssa.OpAMD64VPSHRDQMasked512, + ssa.OpAMD64VPSRAQMasked128, + ssa.OpAMD64VPSRAQMasked256, + ssa.OpAMD64VPSRAQMasked512, + ssa.OpAMD64VPSLLVWMasked128, + ssa.OpAMD64VPSLLVWMasked256, + ssa.OpAMD64VPSLLVWMasked512, + ssa.OpAMD64VPSLLVDMasked128, + ssa.OpAMD64VPSLLVDMasked256, + ssa.OpAMD64VPSLLVDMasked512, + ssa.OpAMD64VPSLLVQMasked128, + ssa.OpAMD64VPSLLVQMasked256, + ssa.OpAMD64VPSLLVQMasked512, + ssa.OpAMD64VPSHLDVWMasked128, + ssa.OpAMD64VPSHLDVWMasked256, + ssa.OpAMD64VPSHLDVWMasked512, + ssa.OpAMD64VPSHLDVDMasked128, + ssa.OpAMD64VPSHLDVDMasked256, + ssa.OpAMD64VPSHLDVDMasked512, + ssa.OpAMD64VPSHLDVQMasked128, + ssa.OpAMD64VPSHLDVQMasked256, + ssa.OpAMD64VPSHLDVQMasked512, + ssa.OpAMD64VPSRLVWMasked128, + ssa.OpAMD64VPSRLVWMasked256, + ssa.OpAMD64VPSRLVWMasked512, + ssa.OpAMD64VPSRLVDMasked128, + ssa.OpAMD64VPSRLVDMasked256, + ssa.OpAMD64VPSRLVDMasked512, + ssa.OpAMD64VPSRLVQMasked128, + ssa.OpAMD64VPSRLVQMasked256, + ssa.OpAMD64VPSRLVQMasked512, + ssa.OpAMD64VPSHRDVWMasked128, + ssa.OpAMD64VPSHRDVWMasked256, + ssa.OpAMD64VPSHRDVWMasked512, + ssa.OpAMD64VPSHRDVDMasked128, + ssa.OpAMD64VPSHRDVDMasked256, + ssa.OpAMD64VPSHRDVDMasked512, + ssa.OpAMD64VPSHRDVQMasked128, + ssa.OpAMD64VPSHRDVQMasked256, + ssa.OpAMD64VPSHRDVQMasked512, + ssa.OpAMD64VPSRAVWMasked128, + ssa.OpAMD64VPSRAVWMasked256, + ssa.OpAMD64VPSRAVWMasked512, + ssa.OpAMD64VPSRAVDMasked128, + ssa.OpAMD64VPSRAVDMasked256, + ssa.OpAMD64VPSRAVDMasked512, + ssa.OpAMD64VPSRAVQMasked128, + ssa.OpAMD64VPSRAVQMasked256, + ssa.OpAMD64VPSRAVQMasked512, ssa.OpAMD64VSQRTPSMasked128, ssa.OpAMD64VSQRTPSMasked256, ssa.OpAMD64VSQRTPSMasked512, diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index b21d58b4a44..968ded21313 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -904,6 +904,54 @@ (MaskedPopCountUint64x2 x mask) => (VPOPCNTQMasked128 x (VPMOVVec64x2ToM mask)) (MaskedPopCountUint64x4 x mask) => (VPOPCNTQMasked256 x (VPMOVVec64x4ToM mask)) (MaskedPopCountUint64x8 x mask) => (VPOPCNTQMasked512 x (VPMOVVec64x8ToM mask)) +(MaskedRotateAllLeftInt32x4 [a] x mask) => (VPROLDMasked128 [a] x (VPMOVVec32x4ToM mask)) +(MaskedRotateAllLeftInt32x8 [a] x mask) => (VPROLDMasked256 [a] x (VPMOVVec32x8ToM mask)) +(MaskedRotateAllLeftInt32x16 [a] x mask) => (VPROLDMasked512 [a] x (VPMOVVec32x16ToM mask)) +(MaskedRotateAllLeftInt64x2 [a] x mask) => (VPROLQMasked128 [a] x (VPMOVVec64x2ToM mask)) +(MaskedRotateAllLeftInt64x4 [a] x mask) => (VPROLQMasked256 [a] x (VPMOVVec64x4ToM mask)) +(MaskedRotateAllLeftInt64x8 [a] x mask) => (VPROLQMasked512 [a] x (VPMOVVec64x8ToM mask)) +(MaskedRotateAllLeftUint32x4 [a] x mask) => (VPROLDMasked128 [a] x (VPMOVVec32x4ToM mask)) +(MaskedRotateAllLeftUint32x8 [a] x mask) => (VPROLDMasked256 [a] x (VPMOVVec32x8ToM mask)) +(MaskedRotateAllLeftUint32x16 [a] x mask) => (VPROLDMasked512 [a] x (VPMOVVec32x16ToM mask)) +(MaskedRotateAllLeftUint64x2 [a] x mask) => (VPROLQMasked128 [a] x (VPMOVVec64x2ToM mask)) +(MaskedRotateAllLeftUint64x4 [a] x mask) => (VPROLQMasked256 [a] x (VPMOVVec64x4ToM mask)) +(MaskedRotateAllLeftUint64x8 [a] x mask) => (VPROLQMasked512 [a] x (VPMOVVec64x8ToM mask)) +(MaskedRotateAllRightInt32x4 [a] x mask) => (VPRORDMasked128 [a] x (VPMOVVec32x4ToM mask)) +(MaskedRotateAllRightInt32x8 [a] x mask) => (VPRORDMasked256 [a] x (VPMOVVec32x8ToM mask)) +(MaskedRotateAllRightInt32x16 [a] x mask) => (VPRORDMasked512 [a] x (VPMOVVec32x16ToM mask)) +(MaskedRotateAllRightInt64x2 [a] x mask) => (VPRORQMasked128 [a] x (VPMOVVec64x2ToM mask)) +(MaskedRotateAllRightInt64x4 [a] x mask) => (VPRORQMasked256 [a] x (VPMOVVec64x4ToM mask)) +(MaskedRotateAllRightInt64x8 [a] x mask) => (VPRORQMasked512 [a] x (VPMOVVec64x8ToM mask)) +(MaskedRotateAllRightUint32x4 [a] x mask) => (VPRORDMasked128 [a] x (VPMOVVec32x4ToM mask)) +(MaskedRotateAllRightUint32x8 [a] x mask) => (VPRORDMasked256 [a] x (VPMOVVec32x8ToM mask)) +(MaskedRotateAllRightUint32x16 [a] x mask) => (VPRORDMasked512 [a] x (VPMOVVec32x16ToM mask)) +(MaskedRotateAllRightUint64x2 [a] x mask) => (VPRORQMasked128 [a] x (VPMOVVec64x2ToM mask)) +(MaskedRotateAllRightUint64x4 [a] x mask) => (VPRORQMasked256 [a] x (VPMOVVec64x4ToM mask)) +(MaskedRotateAllRightUint64x8 [a] x mask) => (VPRORQMasked512 [a] x (VPMOVVec64x8ToM mask)) +(MaskedRotateLeftInt32x4 x y mask) => (VPROLVDMasked128 x y (VPMOVVec32x4ToM mask)) +(MaskedRotateLeftInt32x8 x y mask) => (VPROLVDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedRotateLeftInt32x16 x y mask) => (VPROLVDMasked512 x y (VPMOVVec32x16ToM mask)) +(MaskedRotateLeftInt64x2 x y mask) => (VPROLVQMasked128 x y (VPMOVVec64x2ToM mask)) +(MaskedRotateLeftInt64x4 x y mask) => (VPROLVQMasked256 x y (VPMOVVec64x4ToM mask)) +(MaskedRotateLeftInt64x8 x y mask) => (VPROLVQMasked512 x y (VPMOVVec64x8ToM mask)) +(MaskedRotateLeftUint32x4 x y mask) => (VPROLVDMasked128 x y (VPMOVVec32x4ToM mask)) +(MaskedRotateLeftUint32x8 x y mask) => (VPROLVDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedRotateLeftUint32x16 x y mask) => (VPROLVDMasked512 x y (VPMOVVec32x16ToM mask)) +(MaskedRotateLeftUint64x2 x y mask) => (VPROLVQMasked128 x y (VPMOVVec64x2ToM mask)) +(MaskedRotateLeftUint64x4 x y mask) => (VPROLVQMasked256 x y (VPMOVVec64x4ToM mask)) +(MaskedRotateLeftUint64x8 x y mask) => (VPROLVQMasked512 x y (VPMOVVec64x8ToM mask)) +(MaskedRotateRightInt32x4 x y mask) => (VPRORVDMasked128 x y (VPMOVVec32x4ToM mask)) +(MaskedRotateRightInt32x8 x y mask) => (VPRORVDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedRotateRightInt32x16 x y mask) => (VPRORVDMasked512 x y (VPMOVVec32x16ToM mask)) +(MaskedRotateRightInt64x2 x y mask) => (VPRORVQMasked128 x y (VPMOVVec64x2ToM mask)) +(MaskedRotateRightInt64x4 x y mask) => (VPRORVQMasked256 x y (VPMOVVec64x4ToM mask)) +(MaskedRotateRightInt64x8 x y mask) => (VPRORVQMasked512 x y (VPMOVVec64x8ToM mask)) +(MaskedRotateRightUint32x4 x y mask) => (VPRORVDMasked128 x y (VPMOVVec32x4ToM mask)) +(MaskedRotateRightUint32x8 x y mask) => (VPRORVDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedRotateRightUint32x16 x y mask) => (VPRORVDMasked512 x y (VPMOVVec32x16ToM mask)) +(MaskedRotateRightUint64x2 x y mask) => (VPRORVQMasked128 x y (VPMOVVec64x2ToM mask)) +(MaskedRotateRightUint64x4 x y mask) => (VPRORVQMasked256 x y (VPMOVVec64x4ToM mask)) +(MaskedRotateRightUint64x8 x y mask) => (VPRORVQMasked512 x y (VPMOVVec64x8ToM mask)) (MaskedRoundSuppressExceptionWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+8] x (VPMOVVec32x4ToM mask)) (MaskedRoundSuppressExceptionWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+8] x (VPMOVVec32x8ToM mask)) (MaskedRoundSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+8] x (VPMOVVec32x16ToM mask)) @@ -952,6 +1000,147 @@ (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM mask)) (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM mask)) (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM mask)) +(MaskedShiftAllLeftInt64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM mask)) +(MaskedShiftAllLeftInt64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM mask)) +(MaskedShiftAllLeftInt64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM mask)) +(MaskedShiftAllLeftUint64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM mask)) +(MaskedShiftAllLeftUint64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM mask)) +(MaskedShiftAllLeftUint64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM mask)) +(MaskedShiftAllLeftAndFillUpperFromInt16x8 [a] x y mask) => (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) +(MaskedShiftAllLeftAndFillUpperFromInt16x16 [a] x y mask) => (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) +(MaskedShiftAllLeftAndFillUpperFromInt16x32 [a] x y mask) => (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) +(MaskedShiftAllLeftAndFillUpperFromInt32x4 [a] x y mask) => (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) +(MaskedShiftAllLeftAndFillUpperFromInt32x8 [a] x y mask) => (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) +(MaskedShiftAllLeftAndFillUpperFromInt32x16 [a] x y mask) => (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) +(MaskedShiftAllLeftAndFillUpperFromInt64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) +(MaskedShiftAllLeftAndFillUpperFromInt64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) +(MaskedShiftAllLeftAndFillUpperFromInt64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) +(MaskedShiftAllLeftAndFillUpperFromUint16x8 [a] x y mask) => (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) +(MaskedShiftAllLeftAndFillUpperFromUint16x16 [a] x y mask) => (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) +(MaskedShiftAllLeftAndFillUpperFromUint16x32 [a] x y mask) => (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) +(MaskedShiftAllLeftAndFillUpperFromUint32x4 [a] x y mask) => (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) +(MaskedShiftAllLeftAndFillUpperFromUint32x8 [a] x y mask) => (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) +(MaskedShiftAllLeftAndFillUpperFromUint32x16 [a] x y mask) => (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) +(MaskedShiftAllLeftAndFillUpperFromUint64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) +(MaskedShiftAllLeftAndFillUpperFromUint64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) +(MaskedShiftAllLeftAndFillUpperFromUint64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) +(MaskedShiftAllRightInt64x2 x y mask) => (VPSRLQMasked128 x y (VPMOVVec64x2ToM mask)) +(MaskedShiftAllRightInt64x4 x y mask) => (VPSRLQMasked256 x y (VPMOVVec64x4ToM mask)) +(MaskedShiftAllRightInt64x8 x y mask) => (VPSRLQMasked512 x y (VPMOVVec64x8ToM mask)) +(MaskedShiftAllRightUint64x2 x y mask) => (VPSRLQMasked128 x y (VPMOVVec64x2ToM mask)) +(MaskedShiftAllRightUint64x4 x y mask) => (VPSRLQMasked256 x y (VPMOVVec64x4ToM mask)) +(MaskedShiftAllRightUint64x8 x y mask) => (VPSRLQMasked512 x y (VPMOVVec64x8ToM mask)) +(MaskedShiftAllRightAndFillUpperFromInt16x8 [a] x y mask) => (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) +(MaskedShiftAllRightAndFillUpperFromInt16x16 [a] x y mask) => (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) +(MaskedShiftAllRightAndFillUpperFromInt16x32 [a] x y mask) => (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) +(MaskedShiftAllRightAndFillUpperFromInt32x4 [a] x y mask) => (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) +(MaskedShiftAllRightAndFillUpperFromInt32x8 [a] x y mask) => (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) +(MaskedShiftAllRightAndFillUpperFromInt32x16 [a] x y mask) => (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) +(MaskedShiftAllRightAndFillUpperFromInt64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) +(MaskedShiftAllRightAndFillUpperFromInt64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) +(MaskedShiftAllRightAndFillUpperFromInt64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) +(MaskedShiftAllRightAndFillUpperFromUint16x8 [a] x y mask) => (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) +(MaskedShiftAllRightAndFillUpperFromUint16x16 [a] x y mask) => (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) +(MaskedShiftAllRightAndFillUpperFromUint16x32 [a] x y mask) => (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) +(MaskedShiftAllRightAndFillUpperFromUint32x4 [a] x y mask) => (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) +(MaskedShiftAllRightAndFillUpperFromUint32x8 [a] x y mask) => (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) +(MaskedShiftAllRightAndFillUpperFromUint32x16 [a] x y mask) => (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) +(MaskedShiftAllRightAndFillUpperFromUint64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) +(MaskedShiftAllRightAndFillUpperFromUint64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) +(MaskedShiftAllRightAndFillUpperFromUint64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) +(MaskedShiftAllRightSignExtendedInt64x2 x y mask) => (VPSRAQMasked128 x y (VPMOVVec64x2ToM mask)) +(MaskedShiftAllRightSignExtendedInt64x4 x y mask) => (VPSRAQMasked256 x y (VPMOVVec64x4ToM mask)) +(MaskedShiftAllRightSignExtendedInt64x8 x y mask) => (VPSRAQMasked512 x y (VPMOVVec64x8ToM mask)) +(MaskedShiftLeftInt16x8 x y mask) => (VPSLLVWMasked128 x y (VPMOVVec16x8ToM mask)) +(MaskedShiftLeftInt16x16 x y mask) => (VPSLLVWMasked256 x y (VPMOVVec16x16ToM mask)) +(MaskedShiftLeftInt16x32 x y mask) => (VPSLLVWMasked512 x y (VPMOVVec16x32ToM mask)) +(MaskedShiftLeftInt32x4 x y mask) => (VPSLLVDMasked128 x y (VPMOVVec32x4ToM mask)) +(MaskedShiftLeftInt32x8 x y mask) => (VPSLLVDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedShiftLeftInt32x16 x y mask) => (VPSLLVDMasked512 x y (VPMOVVec32x16ToM mask)) +(MaskedShiftLeftInt64x2 x y mask) => (VPSLLVQMasked128 x y (VPMOVVec64x2ToM mask)) +(MaskedShiftLeftInt64x4 x y mask) => (VPSLLVQMasked256 x y (VPMOVVec64x4ToM mask)) +(MaskedShiftLeftInt64x8 x y mask) => (VPSLLVQMasked512 x y (VPMOVVec64x8ToM mask)) +(MaskedShiftLeftUint16x8 x y mask) => (VPSLLVWMasked128 x y (VPMOVVec16x8ToM mask)) +(MaskedShiftLeftUint16x16 x y mask) => (VPSLLVWMasked256 x y (VPMOVVec16x16ToM mask)) +(MaskedShiftLeftUint16x32 x y mask) => (VPSLLVWMasked512 x y (VPMOVVec16x32ToM mask)) +(MaskedShiftLeftUint32x4 x y mask) => (VPSLLVDMasked128 x y (VPMOVVec32x4ToM mask)) +(MaskedShiftLeftUint32x8 x y mask) => (VPSLLVDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedShiftLeftUint32x16 x y mask) => (VPSLLVDMasked512 x y (VPMOVVec32x16ToM mask)) +(MaskedShiftLeftUint64x2 x y mask) => (VPSLLVQMasked128 x y (VPMOVVec64x2ToM mask)) +(MaskedShiftLeftUint64x4 x y mask) => (VPSLLVQMasked256 x y (VPMOVVec64x4ToM mask)) +(MaskedShiftLeftUint64x8 x y mask) => (VPSLLVQMasked512 x y (VPMOVVec64x8ToM mask)) +(MaskedShiftLeftAndFillUpperFromInt16x8 x y z mask) => (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM mask)) +(MaskedShiftLeftAndFillUpperFromInt16x16 x y z mask) => (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM mask)) +(MaskedShiftLeftAndFillUpperFromInt16x32 x y z mask) => (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM mask)) +(MaskedShiftLeftAndFillUpperFromInt32x4 x y z mask) => (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM mask)) +(MaskedShiftLeftAndFillUpperFromInt32x8 x y z mask) => (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM mask)) +(MaskedShiftLeftAndFillUpperFromInt32x16 x y z mask) => (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM mask)) +(MaskedShiftLeftAndFillUpperFromInt64x2 x y z mask) => (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM mask)) +(MaskedShiftLeftAndFillUpperFromInt64x4 x y z mask) => (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM mask)) +(MaskedShiftLeftAndFillUpperFromInt64x8 x y z mask) => (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM mask)) +(MaskedShiftLeftAndFillUpperFromUint16x8 x y z mask) => (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM mask)) +(MaskedShiftLeftAndFillUpperFromUint16x16 x y z mask) => (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM mask)) +(MaskedShiftLeftAndFillUpperFromUint16x32 x y z mask) => (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM mask)) +(MaskedShiftLeftAndFillUpperFromUint32x4 x y z mask) => (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM mask)) +(MaskedShiftLeftAndFillUpperFromUint32x8 x y z mask) => (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM mask)) +(MaskedShiftLeftAndFillUpperFromUint32x16 x y z mask) => (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM mask)) +(MaskedShiftLeftAndFillUpperFromUint64x2 x y z mask) => (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM mask)) +(MaskedShiftLeftAndFillUpperFromUint64x4 x y z mask) => (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM mask)) +(MaskedShiftLeftAndFillUpperFromUint64x8 x y z mask) => (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM mask)) +(MaskedShiftRightInt16x8 x y mask) => (VPSRLVWMasked128 x y (VPMOVVec16x8ToM mask)) +(MaskedShiftRightInt16x16 x y mask) => (VPSRLVWMasked256 x y (VPMOVVec16x16ToM mask)) +(MaskedShiftRightInt16x32 x y mask) => (VPSRLVWMasked512 x y (VPMOVVec16x32ToM mask)) +(MaskedShiftRightInt32x4 x y mask) => (VPSRLVDMasked128 x y (VPMOVVec32x4ToM mask)) +(MaskedShiftRightInt32x8 x y mask) => (VPSRLVDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedShiftRightInt32x16 x y mask) => (VPSRLVDMasked512 x y (VPMOVVec32x16ToM mask)) +(MaskedShiftRightInt64x2 x y mask) => (VPSRLVQMasked128 x y (VPMOVVec64x2ToM mask)) +(MaskedShiftRightInt64x4 x y mask) => (VPSRLVQMasked256 x y (VPMOVVec64x4ToM mask)) +(MaskedShiftRightInt64x8 x y mask) => (VPSRLVQMasked512 x y (VPMOVVec64x8ToM mask)) +(MaskedShiftRightUint16x8 x y mask) => (VPSRLVWMasked128 x y (VPMOVVec16x8ToM mask)) +(MaskedShiftRightUint16x16 x y mask) => (VPSRLVWMasked256 x y (VPMOVVec16x16ToM mask)) +(MaskedShiftRightUint16x32 x y mask) => (VPSRLVWMasked512 x y (VPMOVVec16x32ToM mask)) +(MaskedShiftRightUint32x4 x y mask) => (VPSRLVDMasked128 x y (VPMOVVec32x4ToM mask)) +(MaskedShiftRightUint32x8 x y mask) => (VPSRLVDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedShiftRightUint32x16 x y mask) => (VPSRLVDMasked512 x y (VPMOVVec32x16ToM mask)) +(MaskedShiftRightUint64x2 x y mask) => (VPSRLVQMasked128 x y (VPMOVVec64x2ToM mask)) +(MaskedShiftRightUint64x4 x y mask) => (VPSRLVQMasked256 x y (VPMOVVec64x4ToM mask)) +(MaskedShiftRightUint64x8 x y mask) => (VPSRLVQMasked512 x y (VPMOVVec64x8ToM mask)) +(MaskedShiftRightAndFillUpperFromInt16x8 x y z mask) => (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM mask)) +(MaskedShiftRightAndFillUpperFromInt16x16 x y z mask) => (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM mask)) +(MaskedShiftRightAndFillUpperFromInt16x32 x y z mask) => (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM mask)) +(MaskedShiftRightAndFillUpperFromInt32x4 x y z mask) => (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM mask)) +(MaskedShiftRightAndFillUpperFromInt32x8 x y z mask) => (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM mask)) +(MaskedShiftRightAndFillUpperFromInt32x16 x y z mask) => (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM mask)) +(MaskedShiftRightAndFillUpperFromInt64x2 x y z mask) => (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM mask)) +(MaskedShiftRightAndFillUpperFromInt64x4 x y z mask) => (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM mask)) +(MaskedShiftRightAndFillUpperFromInt64x8 x y z mask) => (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM mask)) +(MaskedShiftRightAndFillUpperFromUint16x8 x y z mask) => (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM mask)) +(MaskedShiftRightAndFillUpperFromUint16x16 x y z mask) => (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM mask)) +(MaskedShiftRightAndFillUpperFromUint16x32 x y z mask) => (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM mask)) +(MaskedShiftRightAndFillUpperFromUint32x4 x y z mask) => (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM mask)) +(MaskedShiftRightAndFillUpperFromUint32x8 x y z mask) => (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM mask)) +(MaskedShiftRightAndFillUpperFromUint32x16 x y z mask) => (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM mask)) +(MaskedShiftRightAndFillUpperFromUint64x2 x y z mask) => (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM mask)) +(MaskedShiftRightAndFillUpperFromUint64x4 x y z mask) => (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM mask)) +(MaskedShiftRightAndFillUpperFromUint64x8 x y z mask) => (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM mask)) +(MaskedShiftRightSignExtendedInt16x8 x y mask) => (VPSRAVWMasked128 x y (VPMOVVec16x8ToM mask)) +(MaskedShiftRightSignExtendedInt16x16 x y mask) => (VPSRAVWMasked256 x y (VPMOVVec16x16ToM mask)) +(MaskedShiftRightSignExtendedInt16x32 x y mask) => (VPSRAVWMasked512 x y (VPMOVVec16x32ToM mask)) +(MaskedShiftRightSignExtendedInt32x4 x y mask) => (VPSRAVDMasked128 x y (VPMOVVec32x4ToM mask)) +(MaskedShiftRightSignExtendedInt32x8 x y mask) => (VPSRAVDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedShiftRightSignExtendedInt32x16 x y mask) => (VPSRAVDMasked512 x y (VPMOVVec32x16ToM mask)) +(MaskedShiftRightSignExtendedInt64x2 x y mask) => (VPSRAVQMasked128 x y (VPMOVVec64x2ToM mask)) +(MaskedShiftRightSignExtendedInt64x4 x y mask) => (VPSRAVQMasked256 x y (VPMOVVec64x4ToM mask)) +(MaskedShiftRightSignExtendedInt64x8 x y mask) => (VPSRAVQMasked512 x y (VPMOVVec64x8ToM mask)) +(MaskedShiftRightSignExtendedUint16x8 x y mask) => (VPSRAVWMasked128 x y (VPMOVVec16x8ToM mask)) +(MaskedShiftRightSignExtendedUint16x16 x y mask) => (VPSRAVWMasked256 x y (VPMOVVec16x16ToM mask)) +(MaskedShiftRightSignExtendedUint16x32 x y mask) => (VPSRAVWMasked512 x y (VPMOVVec16x32ToM mask)) +(MaskedShiftRightSignExtendedUint32x4 x y mask) => (VPSRAVDMasked128 x y (VPMOVVec32x4ToM mask)) +(MaskedShiftRightSignExtendedUint32x8 x y mask) => (VPSRAVDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaskedShiftRightSignExtendedUint32x16 x y mask) => (VPSRAVDMasked512 x y (VPMOVVec32x16ToM mask)) +(MaskedShiftRightSignExtendedUint64x2 x y mask) => (VPSRAVQMasked128 x y (VPMOVVec64x2ToM mask)) +(MaskedShiftRightSignExtendedUint64x4 x y mask) => (VPSRAVQMasked256 x y (VPMOVVec64x4ToM mask)) +(MaskedShiftRightSignExtendedUint64x8 x y mask) => (VPSRAVQMasked512 x y (VPMOVVec64x8ToM mask)) (MaskedSqrtFloat32x4 x mask) => (VSQRTPSMasked128 x (VPMOVVec32x4ToM mask)) (MaskedSqrtFloat32x8 x mask) => (VSQRTPSMasked256 x (VPMOVVec32x8ToM mask)) (MaskedSqrtFloat32x16 x mask) => (VSQRTPSMasked512 x (VPMOVVec32x16ToM mask)) @@ -1231,6 +1420,54 @@ (PopCountUint64x2 ...) => (VPOPCNTQ128 ...) (PopCountUint64x4 ...) => (VPOPCNTQ256 ...) (PopCountUint64x8 ...) => (VPOPCNTQ512 ...) +(RotateAllLeftInt32x4 [a] x) => (VPROLD128 [a] x) +(RotateAllLeftInt32x8 [a] x) => (VPROLD256 [a] x) +(RotateAllLeftInt32x16 [a] x) => (VPROLD512 [a] x) +(RotateAllLeftInt64x2 [a] x) => (VPROLQ128 [a] x) +(RotateAllLeftInt64x4 [a] x) => (VPROLQ256 [a] x) +(RotateAllLeftInt64x8 [a] x) => (VPROLQ512 [a] x) +(RotateAllLeftUint32x4 [a] x) => (VPROLD128 [a] x) +(RotateAllLeftUint32x8 [a] x) => (VPROLD256 [a] x) +(RotateAllLeftUint32x16 [a] x) => (VPROLD512 [a] x) +(RotateAllLeftUint64x2 [a] x) => (VPROLQ128 [a] x) +(RotateAllLeftUint64x4 [a] x) => (VPROLQ256 [a] x) +(RotateAllLeftUint64x8 [a] x) => (VPROLQ512 [a] x) +(RotateAllRightInt32x4 [a] x) => (VPRORD128 [a] x) +(RotateAllRightInt32x8 [a] x) => (VPRORD256 [a] x) +(RotateAllRightInt32x16 [a] x) => (VPRORD512 [a] x) +(RotateAllRightInt64x2 [a] x) => (VPRORQ128 [a] x) +(RotateAllRightInt64x4 [a] x) => (VPRORQ256 [a] x) +(RotateAllRightInt64x8 [a] x) => (VPRORQ512 [a] x) +(RotateAllRightUint32x4 [a] x) => (VPRORD128 [a] x) +(RotateAllRightUint32x8 [a] x) => (VPRORD256 [a] x) +(RotateAllRightUint32x16 [a] x) => (VPRORD512 [a] x) +(RotateAllRightUint64x2 [a] x) => (VPRORQ128 [a] x) +(RotateAllRightUint64x4 [a] x) => (VPRORQ256 [a] x) +(RotateAllRightUint64x8 [a] x) => (VPRORQ512 [a] x) +(RotateLeftInt32x4 ...) => (VPROLVD128 ...) +(RotateLeftInt32x8 ...) => (VPROLVD256 ...) +(RotateLeftInt32x16 ...) => (VPROLVD512 ...) +(RotateLeftInt64x2 ...) => (VPROLVQ128 ...) +(RotateLeftInt64x4 ...) => (VPROLVQ256 ...) +(RotateLeftInt64x8 ...) => (VPROLVQ512 ...) +(RotateLeftUint32x4 ...) => (VPROLVD128 ...) +(RotateLeftUint32x8 ...) => (VPROLVD256 ...) +(RotateLeftUint32x16 ...) => (VPROLVD512 ...) +(RotateLeftUint64x2 ...) => (VPROLVQ128 ...) +(RotateLeftUint64x4 ...) => (VPROLVQ256 ...) +(RotateLeftUint64x8 ...) => (VPROLVQ512 ...) +(RotateRightInt32x4 ...) => (VPRORVD128 ...) +(RotateRightInt32x8 ...) => (VPRORVD256 ...) +(RotateRightInt32x16 ...) => (VPRORVD512 ...) +(RotateRightInt64x2 ...) => (VPRORVQ128 ...) +(RotateRightInt64x4 ...) => (VPRORVQ256 ...) +(RotateRightInt64x8 ...) => (VPRORVQ512 ...) +(RotateRightUint32x4 ...) => (VPRORVD128 ...) +(RotateRightUint32x8 ...) => (VPRORVD256 ...) +(RotateRightUint32x16 ...) => (VPRORVD512 ...) +(RotateRightUint64x2 ...) => (VPRORVQ128 ...) +(RotateRightUint64x4 ...) => (VPRORVQ256 ...) +(RotateRightUint64x8 ...) => (VPRORVQ512 ...) (RoundFloat32x4 x) => (VROUNDPS128 [0] x) (RoundFloat32x8 x) => (VROUNDPS256 [0] x) (RoundFloat64x2 x) => (VROUNDPD128 [0] x) @@ -1295,6 +1532,167 @@ (SetElemUint16x8 [a] x y) => (VPINSRW128 [a] x y) (SetElemUint32x4 [a] x y) => (VPINSRD128 [a] x y) (SetElemUint64x2 [a] x y) => (VPINSRQ128 [a] x y) +(ShiftAllLeftInt16x8 ...) => (VPSLLW128 ...) +(ShiftAllLeftInt16x16 ...) => (VPSLLW256 ...) +(ShiftAllLeftInt32x4 ...) => (VPSLLD128 ...) +(ShiftAllLeftInt32x8 ...) => (VPSLLD256 ...) +(ShiftAllLeftInt64x2 ...) => (VPSLLQ128 ...) +(ShiftAllLeftInt64x4 ...) => (VPSLLQ256 ...) +(ShiftAllLeftInt64x8 ...) => (VPSLLQ512 ...) +(ShiftAllLeftUint16x8 ...) => (VPSLLW128 ...) +(ShiftAllLeftUint16x16 ...) => (VPSLLW256 ...) +(ShiftAllLeftUint32x4 ...) => (VPSLLD128 ...) +(ShiftAllLeftUint32x8 ...) => (VPSLLD256 ...) +(ShiftAllLeftUint64x2 ...) => (VPSLLQ128 ...) +(ShiftAllLeftUint64x4 ...) => (VPSLLQ256 ...) +(ShiftAllLeftUint64x8 ...) => (VPSLLQ512 ...) +(ShiftAllLeftAndFillUpperFromInt16x8 [a] x y) => (VPSHLDW128 [a] x y) +(ShiftAllLeftAndFillUpperFromInt16x16 [a] x y) => (VPSHLDW256 [a] x y) +(ShiftAllLeftAndFillUpperFromInt16x32 [a] x y) => (VPSHLDW512 [a] x y) +(ShiftAllLeftAndFillUpperFromInt32x4 [a] x y) => (VPSHLDD128 [a] x y) +(ShiftAllLeftAndFillUpperFromInt32x8 [a] x y) => (VPSHLDD256 [a] x y) +(ShiftAllLeftAndFillUpperFromInt32x16 [a] x y) => (VPSHLDD512 [a] x y) +(ShiftAllLeftAndFillUpperFromInt64x2 [a] x y) => (VPSHLDQ128 [a] x y) +(ShiftAllLeftAndFillUpperFromInt64x4 [a] x y) => (VPSHLDQ256 [a] x y) +(ShiftAllLeftAndFillUpperFromInt64x8 [a] x y) => (VPSHLDQ512 [a] x y) +(ShiftAllLeftAndFillUpperFromUint16x8 [a] x y) => (VPSHLDW128 [a] x y) +(ShiftAllLeftAndFillUpperFromUint16x16 [a] x y) => (VPSHLDW256 [a] x y) +(ShiftAllLeftAndFillUpperFromUint16x32 [a] x y) => (VPSHLDW512 [a] x y) +(ShiftAllLeftAndFillUpperFromUint32x4 [a] x y) => (VPSHLDD128 [a] x y) +(ShiftAllLeftAndFillUpperFromUint32x8 [a] x y) => (VPSHLDD256 [a] x y) +(ShiftAllLeftAndFillUpperFromUint32x16 [a] x y) => (VPSHLDD512 [a] x y) +(ShiftAllLeftAndFillUpperFromUint64x2 [a] x y) => (VPSHLDQ128 [a] x y) +(ShiftAllLeftAndFillUpperFromUint64x4 [a] x y) => (VPSHLDQ256 [a] x y) +(ShiftAllLeftAndFillUpperFromUint64x8 [a] x y) => (VPSHLDQ512 [a] x y) +(ShiftAllRightInt16x8 ...) => (VPSRLW128 ...) +(ShiftAllRightInt16x16 ...) => (VPSRLW256 ...) +(ShiftAllRightInt32x4 ...) => (VPSRLD128 ...) +(ShiftAllRightInt32x8 ...) => (VPSRLD256 ...) +(ShiftAllRightInt64x2 ...) => (VPSRLQ128 ...) +(ShiftAllRightInt64x4 ...) => (VPSRLQ256 ...) +(ShiftAllRightInt64x8 ...) => (VPSRLQ512 ...) +(ShiftAllRightUint16x8 ...) => (VPSRLW128 ...) +(ShiftAllRightUint16x16 ...) => (VPSRLW256 ...) +(ShiftAllRightUint32x4 ...) => (VPSRLD128 ...) +(ShiftAllRightUint32x8 ...) => (VPSRLD256 ...) +(ShiftAllRightUint64x2 ...) => (VPSRLQ128 ...) +(ShiftAllRightUint64x4 ...) => (VPSRLQ256 ...) +(ShiftAllRightUint64x8 ...) => (VPSRLQ512 ...) +(ShiftAllRightAndFillUpperFromInt16x8 [a] x y) => (VPSHRDW128 [a] x y) +(ShiftAllRightAndFillUpperFromInt16x16 [a] x y) => (VPSHRDW256 [a] x y) +(ShiftAllRightAndFillUpperFromInt16x32 [a] x y) => (VPSHRDW512 [a] x y) +(ShiftAllRightAndFillUpperFromInt32x4 [a] x y) => (VPSHRDD128 [a] x y) +(ShiftAllRightAndFillUpperFromInt32x8 [a] x y) => (VPSHRDD256 [a] x y) +(ShiftAllRightAndFillUpperFromInt32x16 [a] x y) => (VPSHRDD512 [a] x y) +(ShiftAllRightAndFillUpperFromInt64x2 [a] x y) => (VPSHRDQ128 [a] x y) +(ShiftAllRightAndFillUpperFromInt64x4 [a] x y) => (VPSHRDQ256 [a] x y) +(ShiftAllRightAndFillUpperFromInt64x8 [a] x y) => (VPSHRDQ512 [a] x y) +(ShiftAllRightAndFillUpperFromUint16x8 [a] x y) => (VPSHRDW128 [a] x y) +(ShiftAllRightAndFillUpperFromUint16x16 [a] x y) => (VPSHRDW256 [a] x y) +(ShiftAllRightAndFillUpperFromUint16x32 [a] x y) => (VPSHRDW512 [a] x y) +(ShiftAllRightAndFillUpperFromUint32x4 [a] x y) => (VPSHRDD128 [a] x y) +(ShiftAllRightAndFillUpperFromUint32x8 [a] x y) => (VPSHRDD256 [a] x y) +(ShiftAllRightAndFillUpperFromUint32x16 [a] x y) => (VPSHRDD512 [a] x y) +(ShiftAllRightAndFillUpperFromUint64x2 [a] x y) => (VPSHRDQ128 [a] x y) +(ShiftAllRightAndFillUpperFromUint64x4 [a] x y) => (VPSHRDQ256 [a] x y) +(ShiftAllRightAndFillUpperFromUint64x8 [a] x y) => (VPSHRDQ512 [a] x y) +(ShiftAllRightSignExtendedInt16x8 ...) => (VPSRAW128 ...) +(ShiftAllRightSignExtendedInt16x16 ...) => (VPSRAW256 ...) +(ShiftAllRightSignExtendedInt32x4 ...) => (VPSRAD128 ...) +(ShiftAllRightSignExtendedInt32x8 ...) => (VPSRAD256 ...) +(ShiftAllRightSignExtendedInt64x2 ...) => (VPSRAQ128 ...) +(ShiftAllRightSignExtendedInt64x4 ...) => (VPSRAQ256 ...) +(ShiftAllRightSignExtendedInt64x8 ...) => (VPSRAQ512 ...) +(ShiftLeftInt16x8 ...) => (VPSLLVW128 ...) +(ShiftLeftInt16x16 ...) => (VPSLLVW256 ...) +(ShiftLeftInt16x32 ...) => (VPSLLVW512 ...) +(ShiftLeftInt32x4 ...) => (VPSLLVD128 ...) +(ShiftLeftInt32x8 ...) => (VPSLLVD256 ...) +(ShiftLeftInt32x16 ...) => (VPSLLVD512 ...) +(ShiftLeftInt64x2 ...) => (VPSLLVQ128 ...) +(ShiftLeftInt64x4 ...) => (VPSLLVQ256 ...) +(ShiftLeftInt64x8 ...) => (VPSLLVQ512 ...) +(ShiftLeftUint16x8 ...) => (VPSLLVW128 ...) +(ShiftLeftUint16x16 ...) => (VPSLLVW256 ...) +(ShiftLeftUint16x32 ...) => (VPSLLVW512 ...) +(ShiftLeftUint32x4 ...) => (VPSLLVD128 ...) +(ShiftLeftUint32x8 ...) => (VPSLLVD256 ...) +(ShiftLeftUint32x16 ...) => (VPSLLVD512 ...) +(ShiftLeftUint64x2 ...) => (VPSLLVQ128 ...) +(ShiftLeftUint64x4 ...) => (VPSLLVQ256 ...) +(ShiftLeftUint64x8 ...) => (VPSLLVQ512 ...) +(ShiftLeftAndFillUpperFromInt16x8 ...) => (VPSHLDVW128 ...) +(ShiftLeftAndFillUpperFromInt16x16 ...) => (VPSHLDVW256 ...) +(ShiftLeftAndFillUpperFromInt16x32 ...) => (VPSHLDVW512 ...) +(ShiftLeftAndFillUpperFromInt32x4 ...) => (VPSHLDVD128 ...) +(ShiftLeftAndFillUpperFromInt32x8 ...) => (VPSHLDVD256 ...) +(ShiftLeftAndFillUpperFromInt32x16 ...) => (VPSHLDVD512 ...) +(ShiftLeftAndFillUpperFromInt64x2 ...) => (VPSHLDVQ128 ...) +(ShiftLeftAndFillUpperFromInt64x4 ...) => (VPSHLDVQ256 ...) +(ShiftLeftAndFillUpperFromInt64x8 ...) => (VPSHLDVQ512 ...) +(ShiftLeftAndFillUpperFromUint16x8 ...) => (VPSHLDVW128 ...) +(ShiftLeftAndFillUpperFromUint16x16 ...) => (VPSHLDVW256 ...) +(ShiftLeftAndFillUpperFromUint16x32 ...) => (VPSHLDVW512 ...) +(ShiftLeftAndFillUpperFromUint32x4 ...) => (VPSHLDVD128 ...) +(ShiftLeftAndFillUpperFromUint32x8 ...) => (VPSHLDVD256 ...) +(ShiftLeftAndFillUpperFromUint32x16 ...) => (VPSHLDVD512 ...) +(ShiftLeftAndFillUpperFromUint64x2 ...) => (VPSHLDVQ128 ...) +(ShiftLeftAndFillUpperFromUint64x4 ...) => (VPSHLDVQ256 ...) +(ShiftLeftAndFillUpperFromUint64x8 ...) => (VPSHLDVQ512 ...) +(ShiftRightInt16x8 ...) => (VPSRLVW128 ...) +(ShiftRightInt16x16 ...) => (VPSRLVW256 ...) +(ShiftRightInt16x32 ...) => (VPSRLVW512 ...) +(ShiftRightInt32x4 ...) => (VPSRLVD128 ...) +(ShiftRightInt32x8 ...) => (VPSRLVD256 ...) +(ShiftRightInt32x16 ...) => (VPSRLVD512 ...) +(ShiftRightInt64x2 ...) => (VPSRLVQ128 ...) +(ShiftRightInt64x4 ...) => (VPSRLVQ256 ...) +(ShiftRightInt64x8 ...) => (VPSRLVQ512 ...) +(ShiftRightUint16x8 ...) => (VPSRLVW128 ...) +(ShiftRightUint16x16 ...) => (VPSRLVW256 ...) +(ShiftRightUint16x32 ...) => (VPSRLVW512 ...) +(ShiftRightUint32x4 ...) => (VPSRLVD128 ...) +(ShiftRightUint32x8 ...) => (VPSRLVD256 ...) +(ShiftRightUint32x16 ...) => (VPSRLVD512 ...) +(ShiftRightUint64x2 ...) => (VPSRLVQ128 ...) +(ShiftRightUint64x4 ...) => (VPSRLVQ256 ...) +(ShiftRightUint64x8 ...) => (VPSRLVQ512 ...) +(ShiftRightAndFillUpperFromInt16x8 ...) => (VPSHRDVW128 ...) +(ShiftRightAndFillUpperFromInt16x16 ...) => (VPSHRDVW256 ...) +(ShiftRightAndFillUpperFromInt16x32 ...) => (VPSHRDVW512 ...) +(ShiftRightAndFillUpperFromInt32x4 ...) => (VPSHRDVD128 ...) +(ShiftRightAndFillUpperFromInt32x8 ...) => (VPSHRDVD256 ...) +(ShiftRightAndFillUpperFromInt32x16 ...) => (VPSHRDVD512 ...) +(ShiftRightAndFillUpperFromInt64x2 ...) => (VPSHRDVQ128 ...) +(ShiftRightAndFillUpperFromInt64x4 ...) => (VPSHRDVQ256 ...) +(ShiftRightAndFillUpperFromInt64x8 ...) => (VPSHRDVQ512 ...) +(ShiftRightAndFillUpperFromUint16x8 ...) => (VPSHRDVW128 ...) +(ShiftRightAndFillUpperFromUint16x16 ...) => (VPSHRDVW256 ...) +(ShiftRightAndFillUpperFromUint16x32 ...) => (VPSHRDVW512 ...) +(ShiftRightAndFillUpperFromUint32x4 ...) => (VPSHRDVD128 ...) +(ShiftRightAndFillUpperFromUint32x8 ...) => (VPSHRDVD256 ...) +(ShiftRightAndFillUpperFromUint32x16 ...) => (VPSHRDVD512 ...) +(ShiftRightAndFillUpperFromUint64x2 ...) => (VPSHRDVQ128 ...) +(ShiftRightAndFillUpperFromUint64x4 ...) => (VPSHRDVQ256 ...) +(ShiftRightAndFillUpperFromUint64x8 ...) => (VPSHRDVQ512 ...) +(ShiftRightSignExtendedInt16x8 ...) => (VPSRAVW128 ...) +(ShiftRightSignExtendedInt16x16 ...) => (VPSRAVW256 ...) +(ShiftRightSignExtendedInt16x32 ...) => (VPSRAVW512 ...) +(ShiftRightSignExtendedInt32x4 ...) => (VPSRAVD128 ...) +(ShiftRightSignExtendedInt32x8 ...) => (VPSRAVD256 ...) +(ShiftRightSignExtendedInt32x16 ...) => (VPSRAVD512 ...) +(ShiftRightSignExtendedInt64x2 ...) => (VPSRAVQ128 ...) +(ShiftRightSignExtendedInt64x4 ...) => (VPSRAVQ256 ...) +(ShiftRightSignExtendedInt64x8 ...) => (VPSRAVQ512 ...) +(ShiftRightSignExtendedUint16x8 ...) => (VPSRAVW128 ...) +(ShiftRightSignExtendedUint16x16 ...) => (VPSRAVW256 ...) +(ShiftRightSignExtendedUint16x32 ...) => (VPSRAVW512 ...) +(ShiftRightSignExtendedUint32x4 ...) => (VPSRAVD128 ...) +(ShiftRightSignExtendedUint32x8 ...) => (VPSRAVD256 ...) +(ShiftRightSignExtendedUint32x16 ...) => (VPSRAVD512 ...) +(ShiftRightSignExtendedUint64x2 ...) => (VPSRAVQ128 ...) +(ShiftRightSignExtendedUint64x4 ...) => (VPSRAVQ256 ...) +(ShiftRightSignExtendedUint64x8 ...) => (VPSRAVQ512 ...) (SignInt8x16 ...) => (VPSIGNB128 ...) (SignInt8x32 ...) => (VPSIGNB256 ...) (SignInt16x8 ...) => (VPSIGNW128 ...) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go index 93b136230d0..cbddbe0ff6e 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go @@ -233,6 +233,11 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, {name: "VPOPCNTWMasked256", argLength: 2, reg: fpkfp, asm: "VPOPCNTW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPADDSWMasked256", argLength: 3, reg: fp2kfp, asm: "VPADDSW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPSUBSWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSUBSW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSLLVWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSLLVW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHLDVWMasked256", argLength: 4, reg: fp3kfp, asm: "VPSHLDVW", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPSRLVWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRLVW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHRDVWMasked256", argLength: 4, reg: fp3kfp, asm: "VPSHRDVW", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPSRAVWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRAVW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSUBWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSUBW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMAXSW256", argLength: 2, reg: fp21, asm: "VPMAXSW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINSW256", argLength: 2, reg: fp21, asm: "VPMINSW", commutative: true, typ: "Vec256", resultInArg0: false}, @@ -246,6 +251,14 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, {name: "VPHADDSW256", argLength: 2, reg: fp21, asm: "VPHADDSW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPHSUBSW256", argLength: 2, reg: fp21, asm: "VPHSUBSW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSUBSW256", argLength: 2, reg: fp21, asm: "VPSUBSW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSLLW256", argLength: 2, reg: fp21, asm: "VPSLLW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRLW256", argLength: 2, reg: fp21, asm: "VPSRLW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRAW256", argLength: 2, reg: fp21, asm: "VPSRAW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSLLVW256", argLength: 2, reg: fp21, asm: "VPSLLVW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHLDVW256", argLength: 3, reg: fp31, asm: "VPSHLDVW", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPSRLVW256", argLength: 2, reg: fp21, asm: "VPSRLVW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHRDVW256", argLength: 3, reg: fp31, asm: "VPSHRDVW", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPSRAVW256", argLength: 2, reg: fp21, asm: "VPSRAVW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSIGNW256", argLength: 2, reg: fp21, asm: "VPSIGNW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSUBW256", argLength: 2, reg: fp21, asm: "VPSUBW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPABSW512", argLength: 1, reg: fp11, asm: "VPABSW", commutative: false, typ: "Vec512", resultInArg0: false}, @@ -260,6 +273,11 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, {name: "VPOPCNTWMasked512", argLength: 2, reg: fpkfp, asm: "VPOPCNTW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPADDSWMasked512", argLength: 3, reg: fp2kfp, asm: "VPADDSW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPSUBSWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSUBSW", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSLLVWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSLLVW", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHLDVWMasked512", argLength: 4, reg: fp3kfp, asm: "VPSHLDVW", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSRLVWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRLVW", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHRDVWMasked512", argLength: 4, reg: fp3kfp, asm: "VPSHRDVW", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSRAVWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRAVW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSUBWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSUBW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPMAXSW512", argLength: 2, reg: fp21, asm: "VPMAXSW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINSW512", argLength: 2, reg: fp21, asm: "VPMINSW", commutative: true, typ: "Vec512", resultInArg0: false}, @@ -269,6 +287,11 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, {name: "VPOPCNTW512", argLength: 1, reg: fp11, asm: "VPOPCNTW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPADDSW512", argLength: 2, reg: fp21, asm: "VPADDSW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPSUBSW512", argLength: 2, reg: fp21, asm: "VPSUBSW", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSLLVW512", argLength: 2, reg: fp21, asm: "VPSLLVW", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHLDVW512", argLength: 3, reg: fp31, asm: "VPSHLDVW", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSRLVW512", argLength: 2, reg: fp21, asm: "VPSRLVW", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHRDVW512", argLength: 3, reg: fp31, asm: "VPSHRDVW", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSRAVW512", argLength: 2, reg: fp21, asm: "VPSRAVW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSUBW512", argLength: 2, reg: fp21, asm: "VPSUBW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPABSW128", argLength: 1, reg: fp11, asm: "VPABSW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPADDW128", argLength: 2, reg: fp21, asm: "VPADDW", commutative: true, typ: "Vec128", resultInArg0: false}, @@ -284,6 +307,11 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, {name: "VPOPCNTWMasked128", argLength: 2, reg: fpkfp, asm: "VPOPCNTW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPADDSWMasked128", argLength: 3, reg: fp2kfp, asm: "VPADDSW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPSUBSWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSUBSW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSLLVWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSLLVW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHLDVWMasked128", argLength: 4, reg: fp3kfp, asm: "VPSHLDVW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPSRLVWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRLVW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHRDVWMasked128", argLength: 4, reg: fp3kfp, asm: "VPSHRDVW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPSRAVWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRAVW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSUBWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSUBW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMAXSW128", argLength: 2, reg: fp21, asm: "VPMAXSW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINSW128", argLength: 2, reg: fp21, asm: "VPMINSW", commutative: true, typ: "Vec128", resultInArg0: false}, @@ -297,6 +325,14 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, {name: "VPHADDSW128", argLength: 2, reg: fp21, asm: "VPHADDSW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPHSUBSW128", argLength: 2, reg: fp21, asm: "VPHSUBSW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSUBSW128", argLength: 2, reg: fp21, asm: "VPSUBSW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSLLW128", argLength: 2, reg: fp21, asm: "VPSLLW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRLW128", argLength: 2, reg: fp21, asm: "VPSRLW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRAW128", argLength: 2, reg: fp21, asm: "VPSRAW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSLLVW128", argLength: 2, reg: fp21, asm: "VPSLLVW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHLDVW128", argLength: 3, reg: fp31, asm: "VPSHLDVW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPSRLVW128", argLength: 2, reg: fp21, asm: "VPSRLVW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHRDVW128", argLength: 3, reg: fp31, asm: "VPSHRDVW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPSRAVW128", argLength: 2, reg: fp21, asm: "VPSRAVW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSIGNW128", argLength: 2, reg: fp21, asm: "VPSIGNW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSUBW128", argLength: 2, reg: fp21, asm: "VPSUBW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPABSD512", argLength: 1, reg: fp11, asm: "VPABSD", commutative: false, typ: "Vec512", resultInArg0: false}, @@ -313,8 +349,15 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, {name: "VPORDMasked512", argLength: 3, reg: fp2kfp, asm: "VPORD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPDPWSSDMasked512", argLength: 4, reg: fp3kfp, asm: "VPDPWSSD", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPOPCNTDMasked512", argLength: 2, reg: fpkfp, asm: "VPOPCNTD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPROLVDMasked512", argLength: 3, reg: fp2kfp, asm: "VPROLVD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPRORVDMasked512", argLength: 3, reg: fp2kfp, asm: "VPRORVD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPDPWSSDSMasked512", argLength: 4, reg: fp3kfp, asm: "VPDPWSSDS", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPDPBUSDSMasked512", argLength: 4, reg: fp3kfp, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSLLVDMasked512", argLength: 3, reg: fp2kfp, asm: "VPSLLVD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHLDVDMasked512", argLength: 4, reg: fp3kfp, asm: "VPSHLDVD", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSRLVDMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRLVD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHRDVDMasked512", argLength: 4, reg: fp3kfp, asm: "VPSHRDVD", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSRAVDMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRAVD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSUBDMasked512", argLength: 3, reg: fp2kfp, asm: "VPSUBD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPDPBUSDMasked512", argLength: 4, reg: fp3kfp, asm: "VPDPBUSD", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPXORDMasked512", argLength: 3, reg: fp2kfp, asm: "VPXORD", commutative: true, typ: "Vec512", resultInArg0: false}, @@ -324,8 +367,15 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, {name: "VPORD512", argLength: 2, reg: fp21, asm: "VPORD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPDPWSSD512", argLength: 3, reg: fp31, asm: "VPDPWSSD", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPOPCNTD512", argLength: 1, reg: fp11, asm: "VPOPCNTD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPROLVD512", argLength: 2, reg: fp21, asm: "VPROLVD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPRORVD512", argLength: 2, reg: fp21, asm: "VPRORVD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPDPWSSDS512", argLength: 3, reg: fp31, asm: "VPDPWSSDS", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPDPBUSDS512", argLength: 3, reg: fp31, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSLLVD512", argLength: 2, reg: fp21, asm: "VPSLLVD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHLDVD512", argLength: 3, reg: fp31, asm: "VPSHLDVD", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSRLVD512", argLength: 2, reg: fp21, asm: "VPSRLVD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHRDVD512", argLength: 3, reg: fp31, asm: "VPSHRDVD", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSRAVD512", argLength: 2, reg: fp21, asm: "VPSRAVD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSUBD512", argLength: 2, reg: fp21, asm: "VPSUBD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPDPBUSD512", argLength: 3, reg: fp31, asm: "VPDPBUSD", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPXORD512", argLength: 2, reg: fp21, asm: "VPXORD", commutative: true, typ: "Vec512", resultInArg0: false}, @@ -343,8 +393,15 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, {name: "VPORDMasked128", argLength: 3, reg: fp2kfp, asm: "VPORD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPDPWSSDMasked128", argLength: 4, reg: fp3kfp, asm: "VPDPWSSD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPOPCNTDMasked128", argLength: 2, reg: fpkfp, asm: "VPOPCNTD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPROLVDMasked128", argLength: 3, reg: fp2kfp, asm: "VPROLVD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPRORVDMasked128", argLength: 3, reg: fp2kfp, asm: "VPRORVD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPDPWSSDSMasked128", argLength: 4, reg: fp3kfp, asm: "VPDPWSSDS", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPDPBUSDSMasked128", argLength: 4, reg: fp3kfp, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPSLLVDMasked128", argLength: 3, reg: fp2kfp, asm: "VPSLLVD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHLDVDMasked128", argLength: 4, reg: fp3kfp, asm: "VPSHLDVD", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPSRLVDMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRLVD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHRDVDMasked128", argLength: 4, reg: fp3kfp, asm: "VPSHRDVD", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPSRAVDMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRAVD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSUBDMasked128", argLength: 3, reg: fp2kfp, asm: "VPSUBD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPDPBUSDMasked128", argLength: 4, reg: fp3kfp, asm: "VPDPBUSD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPXORDMasked128", argLength: 3, reg: fp2kfp, asm: "VPXORD", commutative: true, typ: "Vec128", resultInArg0: false}, @@ -356,8 +413,18 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, {name: "VPHADDD128", argLength: 2, reg: fp21, asm: "VPHADDD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPHSUBD128", argLength: 2, reg: fp21, asm: "VPHSUBD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPOPCNTD128", argLength: 1, reg: fp11, asm: "VPOPCNTD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPROLVD128", argLength: 2, reg: fp21, asm: "VPROLVD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPRORVD128", argLength: 2, reg: fp21, asm: "VPRORVD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPDPWSSDS128", argLength: 3, reg: fp31, asm: "VPDPWSSDS", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPDPBUSDS128", argLength: 3, reg: fp31, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPSLLD128", argLength: 2, reg: fp21, asm: "VPSLLD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRLD128", argLength: 2, reg: fp21, asm: "VPSRLD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRAD128", argLength: 2, reg: fp21, asm: "VPSRAD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSLLVD128", argLength: 2, reg: fp21, asm: "VPSLLVD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHLDVD128", argLength: 3, reg: fp31, asm: "VPSHLDVD", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPSRLVD128", argLength: 2, reg: fp21, asm: "VPSRLVD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHRDVD128", argLength: 3, reg: fp31, asm: "VPSHRDVD", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPSRAVD128", argLength: 2, reg: fp21, asm: "VPSRAVD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSIGND128", argLength: 2, reg: fp21, asm: "VPSIGND", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSUBD128", argLength: 2, reg: fp21, asm: "VPSUBD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPDPBUSD128", argLength: 3, reg: fp31, asm: "VPDPBUSD", commutative: false, typ: "Vec128", resultInArg0: true}, @@ -375,8 +442,15 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, {name: "VPORDMasked256", argLength: 3, reg: fp2kfp, asm: "VPORD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPDPWSSDMasked256", argLength: 4, reg: fp3kfp, asm: "VPDPWSSD", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPOPCNTDMasked256", argLength: 2, reg: fpkfp, asm: "VPOPCNTD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPROLVDMasked256", argLength: 3, reg: fp2kfp, asm: "VPROLVD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPRORVDMasked256", argLength: 3, reg: fp2kfp, asm: "VPRORVD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPDPWSSDSMasked256", argLength: 4, reg: fp3kfp, asm: "VPDPWSSDS", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPDPBUSDSMasked256", argLength: 4, reg: fp3kfp, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPSLLVDMasked256", argLength: 3, reg: fp2kfp, asm: "VPSLLVD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHLDVDMasked256", argLength: 4, reg: fp3kfp, asm: "VPSHLDVD", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPSRLVDMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRLVD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHRDVDMasked256", argLength: 4, reg: fp3kfp, asm: "VPSHRDVD", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPSRAVDMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRAVD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSUBDMasked256", argLength: 3, reg: fp2kfp, asm: "VPSUBD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPDPBUSDMasked256", argLength: 4, reg: fp3kfp, asm: "VPDPBUSD", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPXORDMasked256", argLength: 3, reg: fp2kfp, asm: "VPXORD", commutative: true, typ: "Vec256", resultInArg0: false}, @@ -388,8 +462,18 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, {name: "VPHADDD256", argLength: 2, reg: fp21, asm: "VPHADDD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPHSUBD256", argLength: 2, reg: fp21, asm: "VPHSUBD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPOPCNTD256", argLength: 1, reg: fp11, asm: "VPOPCNTD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPROLVD256", argLength: 2, reg: fp21, asm: "VPROLVD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPRORVD256", argLength: 2, reg: fp21, asm: "VPRORVD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPDPWSSDS256", argLength: 3, reg: fp31, asm: "VPDPWSSDS", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPDPBUSDS256", argLength: 3, reg: fp31, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPSLLD256", argLength: 2, reg: fp21, asm: "VPSLLD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRLD256", argLength: 2, reg: fp21, asm: "VPSRLD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRAD256", argLength: 2, reg: fp21, asm: "VPSRAD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSLLVD256", argLength: 2, reg: fp21, asm: "VPSLLVD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHLDVD256", argLength: 3, reg: fp31, asm: "VPSHLDVD", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPSRLVD256", argLength: 2, reg: fp21, asm: "VPSRLVD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHRDVD256", argLength: 3, reg: fp31, asm: "VPSHRDVD", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPSRAVD256", argLength: 2, reg: fp21, asm: "VPSRAVD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSIGND256", argLength: 2, reg: fp21, asm: "VPSIGND", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSUBD256", argLength: 2, reg: fp21, asm: "VPSUBD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPDPBUSD256", argLength: 3, reg: fp31, asm: "VPDPBUSD", commutative: false, typ: "Vec256", resultInArg0: true}, @@ -406,12 +490,32 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, {name: "VPMULLQMasked128", argLength: 3, reg: fp2kfp, asm: "VPMULLQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPORQMasked128", argLength: 3, reg: fp2kfp, asm: "VPORQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPOPCNTQMasked128", argLength: 2, reg: fpkfp, asm: "VPOPCNTQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPROLVQMasked128", argLength: 3, reg: fp2kfp, asm: "VPROLVQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPRORVQMasked128", argLength: 3, reg: fp2kfp, asm: "VPRORVQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSLLQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSLLQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRLQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRLQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRAQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRAQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSLLVQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSLLVQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHLDVQMasked128", argLength: 4, reg: fp3kfp, asm: "VPSHLDVQ", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPSRLVQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRLVQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHRDVQMasked128", argLength: 4, reg: fp3kfp, asm: "VPSHRDVQ", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPSRAVQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRAVQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSUBQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSUBQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPXORQMasked128", argLength: 3, reg: fp2kfp, asm: "VPXORQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMAXSQ128", argLength: 2, reg: fp21, asm: "VPMAXSQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINSQ128", argLength: 2, reg: fp21, asm: "VPMINSQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULLQ128", argLength: 2, reg: fp21, asm: "VPMULLQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPOPCNTQ128", argLength: 1, reg: fp11, asm: "VPOPCNTQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPROLVQ128", argLength: 2, reg: fp21, asm: "VPROLVQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPRORVQ128", argLength: 2, reg: fp21, asm: "VPRORVQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSLLQ128", argLength: 2, reg: fp21, asm: "VPSLLQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRLQ128", argLength: 2, reg: fp21, asm: "VPSRLQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRAQ128", argLength: 2, reg: fp21, asm: "VPSRAQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSLLVQ128", argLength: 2, reg: fp21, asm: "VPSLLVQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHLDVQ128", argLength: 3, reg: fp31, asm: "VPSHLDVQ", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPSRLVQ128", argLength: 2, reg: fp21, asm: "VPSRLVQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHRDVQ128", argLength: 3, reg: fp31, asm: "VPSHRDVQ", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPSRAVQ128", argLength: 2, reg: fp21, asm: "VPSRAVQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSUBQ128", argLength: 2, reg: fp21, asm: "VPSUBQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPABSQ256", argLength: 1, reg: fp11, asm: "VPABSQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPADDQ256", argLength: 2, reg: fp21, asm: "VPADDQ", commutative: true, typ: "Vec256", resultInArg0: false}, @@ -427,12 +531,32 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, {name: "VPMULLQMasked256", argLength: 3, reg: fp2kfp, asm: "VPMULLQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPORQMasked256", argLength: 3, reg: fp2kfp, asm: "VPORQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPOPCNTQMasked256", argLength: 2, reg: fpkfp, asm: "VPOPCNTQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPROLVQMasked256", argLength: 3, reg: fp2kfp, asm: "VPROLVQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPRORVQMasked256", argLength: 3, reg: fp2kfp, asm: "VPRORVQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSLLQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSLLQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRLQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRLQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRAQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRAQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSLLVQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSLLVQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHLDVQMasked256", argLength: 4, reg: fp3kfp, asm: "VPSHLDVQ", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPSRLVQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRLVQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHRDVQMasked256", argLength: 4, reg: fp3kfp, asm: "VPSHRDVQ", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPSRAVQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRAVQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSUBQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSUBQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPXORQMasked256", argLength: 3, reg: fp2kfp, asm: "VPXORQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMAXSQ256", argLength: 2, reg: fp21, asm: "VPMAXSQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINSQ256", argLength: 2, reg: fp21, asm: "VPMINSQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULLQ256", argLength: 2, reg: fp21, asm: "VPMULLQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPOPCNTQ256", argLength: 1, reg: fp11, asm: "VPOPCNTQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPROLVQ256", argLength: 2, reg: fp21, asm: "VPROLVQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPRORVQ256", argLength: 2, reg: fp21, asm: "VPRORVQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSLLQ256", argLength: 2, reg: fp21, asm: "VPSLLQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRLQ256", argLength: 2, reg: fp21, asm: "VPSRLQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRAQ256", argLength: 2, reg: fp21, asm: "VPSRAQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSLLVQ256", argLength: 2, reg: fp21, asm: "VPSLLVQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHLDVQ256", argLength: 3, reg: fp31, asm: "VPSHLDVQ", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPSRLVQ256", argLength: 2, reg: fp21, asm: "VPSRLVQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHRDVQ256", argLength: 3, reg: fp31, asm: "VPSHRDVQ", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPSRAVQ256", argLength: 2, reg: fp21, asm: "VPSRAVQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSUBQ256", argLength: 2, reg: fp21, asm: "VPSUBQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPABSQ512", argLength: 1, reg: fp11, asm: "VPABSQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPADDQ512", argLength: 2, reg: fp21, asm: "VPADDQ", commutative: true, typ: "Vec512", resultInArg0: false}, @@ -448,6 +572,16 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, {name: "VPMULLQMasked512", argLength: 3, reg: fp2kfp, asm: "VPMULLQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPORQMasked512", argLength: 3, reg: fp2kfp, asm: "VPORQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPOPCNTQMasked512", argLength: 2, reg: fpkfp, asm: "VPOPCNTQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPROLVQMasked512", argLength: 3, reg: fp2kfp, asm: "VPROLVQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPRORVQMasked512", argLength: 3, reg: fp2kfp, asm: "VPRORVQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSLLQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSLLQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRLQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRAQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRAQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSLLVQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSLLVQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHLDVQMasked512", argLength: 4, reg: fp3kfp, asm: "VPSHLDVQ", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSRLVQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHRDVQMasked512", argLength: 4, reg: fp3kfp, asm: "VPSHRDVQ", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSRAVQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRAVQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSUBQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSUBQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPXORQMasked512", argLength: 3, reg: fp2kfp, asm: "VPXORQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMAXSQ512", argLength: 2, reg: fp21, asm: "VPMAXSQ", commutative: true, typ: "Vec512", resultInArg0: false}, @@ -456,6 +590,16 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, {name: "VPMULLQ512", argLength: 2, reg: fp21, asm: "VPMULLQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPORQ512", argLength: 2, reg: fp21, asm: "VPORQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPOPCNTQ512", argLength: 1, reg: fp11, asm: "VPOPCNTQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPROLVQ512", argLength: 2, reg: fp21, asm: "VPROLVQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPRORVQ512", argLength: 2, reg: fp21, asm: "VPRORVQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSLLQ512", argLength: 2, reg: fp21, asm: "VPSLLQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRLQ512", argLength: 2, reg: fp21, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRAQ512", argLength: 2, reg: fp21, asm: "VPSRAQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSLLVQ512", argLength: 2, reg: fp21, asm: "VPSLLVQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHLDVQ512", argLength: 3, reg: fp31, asm: "VPSHLDVQ", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSRLVQ512", argLength: 2, reg: fp21, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHRDVQ512", argLength: 3, reg: fp31, asm: "VPSHRDVQ", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSRAVQ512", argLength: 2, reg: fp21, asm: "VPSRAVQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSUBQ512", argLength: 2, reg: fp21, asm: "VPSUBQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPXORQ512", argLength: 2, reg: fp21, asm: "VPXORQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPABSB128", argLength: 1, reg: fp11, asm: "VPABSB", commutative: false, typ: "Vec128", resultInArg0: false}, @@ -641,28 +785,88 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, {name: "VCMPPDMasked512", argLength: 3, reg: fp2kk, asm: "VCMPPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPW256", argLength: 2, reg: fp2k, asm: "VPCMPW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPWMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPSHLDWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHRDWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHLDW256", argLength: 2, reg: fp21, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHRDW256", argLength: 2, reg: fp21, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPCMPW512", argLength: 2, reg: fp2k, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPWMasked512", argLength: 3, reg: fp2kk, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPSHLDWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHRDWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHLDW512", argLength: 2, reg: fp21, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHRDW512", argLength: 2, reg: fp21, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPEXTRW128", argLength: 1, reg: fpgp, asm: "VPEXTRW", aux: "Int8", commutative: false, typ: "int16", resultInArg0: false}, {name: "VPCMPW128", argLength: 2, reg: fp2k, asm: "VPCMPW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPWMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPSHLDWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHRDWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPINSRW128", argLength: 2, reg: fpgpfp, asm: "VPINSRW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHLDW128", argLength: 2, reg: fp21, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHRDW128", argLength: 2, reg: fp21, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPCMPD512", argLength: 2, reg: fp2k, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPDMasked512", argLength: 3, reg: fp2kk, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPROLDMasked512", argLength: 2, reg: fpkfp, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPRORDMasked512", argLength: 2, reg: fpkfp, asm: "VPRORD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHLDDMasked512", argLength: 3, reg: fp2kfp, asm: "VPSHLDD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHRDDMasked512", argLength: 3, reg: fp2kfp, asm: "VPSHRDD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPROLD512", argLength: 1, reg: fp11, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPRORD512", argLength: 1, reg: fp11, asm: "VPRORD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHLDD512", argLength: 2, reg: fp21, asm: "VPSHLDD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHRDD512", argLength: 2, reg: fp21, asm: "VPSHRDD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPEXTRD128", argLength: 1, reg: fpgp, asm: "VPEXTRD", aux: "Int8", commutative: false, typ: "int32", resultInArg0: false}, {name: "VPCMPD128", argLength: 2, reg: fp2k, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPDMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPROLDMasked128", argLength: 2, reg: fpkfp, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPRORDMasked128", argLength: 2, reg: fpkfp, asm: "VPRORD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHLDDMasked128", argLength: 3, reg: fp2kfp, asm: "VPSHLDD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHRDDMasked128", argLength: 3, reg: fp2kfp, asm: "VPSHRDD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPROLD128", argLength: 1, reg: fp11, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPRORD128", argLength: 1, reg: fp11, asm: "VPRORD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPINSRD128", argLength: 2, reg: fpgpfp, asm: "VPINSRD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHLDD128", argLength: 2, reg: fp21, asm: "VPSHLDD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHRDD128", argLength: 2, reg: fp21, asm: "VPSHRDD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPCMPD256", argLength: 2, reg: fp2k, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPDMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPROLDMasked256", argLength: 2, reg: fpkfp, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPRORDMasked256", argLength: 2, reg: fpkfp, asm: "VPRORD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHLDDMasked256", argLength: 3, reg: fp2kfp, asm: "VPSHLDD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHRDDMasked256", argLength: 3, reg: fp2kfp, asm: "VPSHRDD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPROLD256", argLength: 1, reg: fp11, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPRORD256", argLength: 1, reg: fp11, asm: "VPRORD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHLDD256", argLength: 2, reg: fp21, asm: "VPSHLDD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHRDD256", argLength: 2, reg: fp21, asm: "VPSHRDD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPEXTRQ128", argLength: 1, reg: fpgp, asm: "VPEXTRQ", aux: "Int8", commutative: false, typ: "int64", resultInArg0: false}, {name: "VPCMPQ128", argLength: 2, reg: fp2k, asm: "VPCMPQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPQMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPROLQMasked128", argLength: 2, reg: fpkfp, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPRORQMasked128", argLength: 2, reg: fpkfp, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHLDQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSHLDQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHRDQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPROLQ128", argLength: 1, reg: fp11, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPRORQ128", argLength: 1, reg: fp11, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPINSRQ128", argLength: 2, reg: fpgpfp, asm: "VPINSRQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHLDQ128", argLength: 2, reg: fp21, asm: "VPSHLDQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHRDQ128", argLength: 2, reg: fp21, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPCMPQ256", argLength: 2, reg: fp2k, asm: "VPCMPQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPQMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPROLQMasked256", argLength: 2, reg: fpkfp, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPRORQMasked256", argLength: 2, reg: fpkfp, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHLDQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSHLDQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHRDQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPROLQ256", argLength: 1, reg: fp11, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPRORQ256", argLength: 1, reg: fp11, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHLDQ256", argLength: 2, reg: fp21, asm: "VPSHLDQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHRDQ256", argLength: 2, reg: fp21, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPCMPQ512", argLength: 2, reg: fp2k, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPQMasked512", argLength: 3, reg: fp2kk, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPROLQMasked512", argLength: 2, reg: fpkfp, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPRORQMasked512", argLength: 2, reg: fpkfp, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHLDQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSHLDQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHRDQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPROLQ512", argLength: 1, reg: fp11, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPRORQ512", argLength: 1, reg: fp11, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHLDQ512", argLength: 2, reg: fp21, asm: "VPSHLDQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHRDQ512", argLength: 2, reg: fp21, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPEXTRB128", argLength: 1, reg: fpgp, asm: "VPEXTRB", aux: "Int8", commutative: false, typ: "int8", resultInArg0: false}, {name: "VPCMPB128", argLength: 2, reg: fp2k, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPBMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go index 1c33483f424..0f3d3f8214c 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go +++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go @@ -345,6 +345,11 @@ func simdGenericOps() []opData { {name: "MaskedPopCountInt16x16", argLength: 2, commutative: false}, {name: "MaskedSaturatedAddInt16x16", argLength: 3, commutative: true}, {name: "MaskedSaturatedSubInt16x16", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftInt16x16", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftAndFillUpperFromInt16x16", argLength: 4, commutative: false}, + {name: "MaskedShiftRightInt16x16", argLength: 3, commutative: false}, + {name: "MaskedShiftRightAndFillUpperFromInt16x16", argLength: 4, commutative: false}, + {name: "MaskedShiftRightSignExtendedInt16x16", argLength: 3, commutative: false}, {name: "MaskedSubInt16x16", argLength: 3, commutative: false}, {name: "MaxInt16x16", argLength: 2, commutative: true}, {name: "MinInt16x16", argLength: 2, commutative: true}, @@ -360,6 +365,14 @@ func simdGenericOps() []opData { {name: "SaturatedPairwiseAddInt16x16", argLength: 2, commutative: false}, {name: "SaturatedPairwiseSubInt16x16", argLength: 2, commutative: false}, {name: "SaturatedSubInt16x16", argLength: 2, commutative: false}, + {name: "ShiftAllLeftInt16x16", argLength: 2, commutative: false}, + {name: "ShiftAllRightInt16x16", argLength: 2, commutative: false}, + {name: "ShiftAllRightSignExtendedInt16x16", argLength: 2, commutative: false}, + {name: "ShiftLeftInt16x16", argLength: 2, commutative: false}, + {name: "ShiftLeftAndFillUpperFromInt16x16", argLength: 3, commutative: false}, + {name: "ShiftRightInt16x16", argLength: 2, commutative: false}, + {name: "ShiftRightAndFillUpperFromInt16x16", argLength: 3, commutative: false}, + {name: "ShiftRightSignExtendedInt16x16", argLength: 2, commutative: false}, {name: "SignInt16x16", argLength: 2, commutative: false}, {name: "SubInt16x16", argLength: 2, commutative: false}, {name: "XorInt16x16", argLength: 2, commutative: true}, @@ -386,6 +399,11 @@ func simdGenericOps() []opData { {name: "MaskedPopCountInt16x32", argLength: 2, commutative: false}, {name: "MaskedSaturatedAddInt16x32", argLength: 3, commutative: true}, {name: "MaskedSaturatedSubInt16x32", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftInt16x32", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftAndFillUpperFromInt16x32", argLength: 4, commutative: false}, + {name: "MaskedShiftRightInt16x32", argLength: 3, commutative: false}, + {name: "MaskedShiftRightAndFillUpperFromInt16x32", argLength: 4, commutative: false}, + {name: "MaskedShiftRightSignExtendedInt16x32", argLength: 3, commutative: false}, {name: "MaskedSubInt16x32", argLength: 3, commutative: false}, {name: "MaxInt16x32", argLength: 2, commutative: true}, {name: "MinInt16x32", argLength: 2, commutative: true}, @@ -396,6 +414,11 @@ func simdGenericOps() []opData { {name: "PopCountInt16x32", argLength: 1, commutative: false}, {name: "SaturatedAddInt16x32", argLength: 2, commutative: true}, {name: "SaturatedSubInt16x32", argLength: 2, commutative: false}, + {name: "ShiftLeftInt16x32", argLength: 2, commutative: false}, + {name: "ShiftLeftAndFillUpperFromInt16x32", argLength: 3, commutative: false}, + {name: "ShiftRightInt16x32", argLength: 2, commutative: false}, + {name: "ShiftRightAndFillUpperFromInt16x32", argLength: 3, commutative: false}, + {name: "ShiftRightSignExtendedInt16x32", argLength: 2, commutative: false}, {name: "SubInt16x32", argLength: 2, commutative: false}, {name: "AbsoluteInt16x8", argLength: 1, commutative: false}, {name: "AddInt16x8", argLength: 2, commutative: true}, @@ -422,6 +445,11 @@ func simdGenericOps() []opData { {name: "MaskedPopCountInt16x8", argLength: 2, commutative: false}, {name: "MaskedSaturatedAddInt16x8", argLength: 3, commutative: true}, {name: "MaskedSaturatedSubInt16x8", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftInt16x8", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftAndFillUpperFromInt16x8", argLength: 4, commutative: false}, + {name: "MaskedShiftRightInt16x8", argLength: 3, commutative: false}, + {name: "MaskedShiftRightAndFillUpperFromInt16x8", argLength: 4, commutative: false}, + {name: "MaskedShiftRightSignExtendedInt16x8", argLength: 3, commutative: false}, {name: "MaskedSubInt16x8", argLength: 3, commutative: false}, {name: "MaxInt16x8", argLength: 2, commutative: true}, {name: "MinInt16x8", argLength: 2, commutative: true}, @@ -437,6 +465,14 @@ func simdGenericOps() []opData { {name: "SaturatedPairwiseAddInt16x8", argLength: 2, commutative: false}, {name: "SaturatedPairwiseSubInt16x8", argLength: 2, commutative: false}, {name: "SaturatedSubInt16x8", argLength: 2, commutative: false}, + {name: "ShiftAllLeftInt16x8", argLength: 2, commutative: false}, + {name: "ShiftAllRightInt16x8", argLength: 2, commutative: false}, + {name: "ShiftAllRightSignExtendedInt16x8", argLength: 2, commutative: false}, + {name: "ShiftLeftInt16x8", argLength: 2, commutative: false}, + {name: "ShiftLeftAndFillUpperFromInt16x8", argLength: 3, commutative: false}, + {name: "ShiftRightInt16x8", argLength: 2, commutative: false}, + {name: "ShiftRightAndFillUpperFromInt16x8", argLength: 3, commutative: false}, + {name: "ShiftRightSignExtendedInt16x8", argLength: 2, commutative: false}, {name: "SignInt16x8", argLength: 2, commutative: false}, {name: "SubInt16x8", argLength: 2, commutative: false}, {name: "XorInt16x8", argLength: 2, commutative: true}, @@ -465,8 +501,15 @@ func simdGenericOps() []opData { {name: "MaskedOrInt32x16", argLength: 3, commutative: true}, {name: "MaskedPairDotProdAccumulateInt32x16", argLength: 4, commutative: false}, {name: "MaskedPopCountInt32x16", argLength: 2, commutative: false}, + {name: "MaskedRotateLeftInt32x16", argLength: 3, commutative: false}, + {name: "MaskedRotateRightInt32x16", argLength: 3, commutative: false}, {name: "MaskedSaturatedPairDotProdAccumulateInt32x16", argLength: 4, commutative: false}, {name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 4, commutative: false}, + {name: "MaskedShiftLeftInt32x16", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftAndFillUpperFromInt32x16", argLength: 4, commutative: false}, + {name: "MaskedShiftRightInt32x16", argLength: 3, commutative: false}, + {name: "MaskedShiftRightAndFillUpperFromInt32x16", argLength: 4, commutative: false}, + {name: "MaskedShiftRightSignExtendedInt32x16", argLength: 3, commutative: false}, {name: "MaskedSubInt32x16", argLength: 3, commutative: false}, {name: "MaskedUnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 4, commutative: false}, {name: "MaskedXorInt32x16", argLength: 3, commutative: true}, @@ -477,8 +520,15 @@ func simdGenericOps() []opData { {name: "OrInt32x16", argLength: 2, commutative: true}, {name: "PairDotProdAccumulateInt32x16", argLength: 3, commutative: false}, {name: "PopCountInt32x16", argLength: 1, commutative: false}, + {name: "RotateLeftInt32x16", argLength: 2, commutative: false}, + {name: "RotateRightInt32x16", argLength: 2, commutative: false}, {name: "SaturatedPairDotProdAccumulateInt32x16", argLength: 3, commutative: false}, {name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 3, commutative: false}, + {name: "ShiftLeftInt32x16", argLength: 2, commutative: false}, + {name: "ShiftLeftAndFillUpperFromInt32x16", argLength: 3, commutative: false}, + {name: "ShiftRightInt32x16", argLength: 2, commutative: false}, + {name: "ShiftRightAndFillUpperFromInt32x16", argLength: 3, commutative: false}, + {name: "ShiftRightSignExtendedInt32x16", argLength: 2, commutative: false}, {name: "SubInt32x16", argLength: 2, commutative: false}, {name: "UnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 3, commutative: false}, {name: "XorInt32x16", argLength: 2, commutative: true}, @@ -507,8 +557,15 @@ func simdGenericOps() []opData { {name: "MaskedOrInt32x4", argLength: 3, commutative: true}, {name: "MaskedPairDotProdAccumulateInt32x4", argLength: 4, commutative: false}, {name: "MaskedPopCountInt32x4", argLength: 2, commutative: false}, + {name: "MaskedRotateLeftInt32x4", argLength: 3, commutative: false}, + {name: "MaskedRotateRightInt32x4", argLength: 3, commutative: false}, {name: "MaskedSaturatedPairDotProdAccumulateInt32x4", argLength: 4, commutative: false}, {name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 4, commutative: false}, + {name: "MaskedShiftLeftInt32x4", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftAndFillUpperFromInt32x4", argLength: 4, commutative: false}, + {name: "MaskedShiftRightInt32x4", argLength: 3, commutative: false}, + {name: "MaskedShiftRightAndFillUpperFromInt32x4", argLength: 4, commutative: false}, + {name: "MaskedShiftRightSignExtendedInt32x4", argLength: 3, commutative: false}, {name: "MaskedSubInt32x4", argLength: 3, commutative: false}, {name: "MaskedUnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 4, commutative: false}, {name: "MaskedXorInt32x4", argLength: 3, commutative: true}, @@ -522,8 +579,18 @@ func simdGenericOps() []opData { {name: "PairwiseAddInt32x4", argLength: 2, commutative: false}, {name: "PairwiseSubInt32x4", argLength: 2, commutative: false}, {name: "PopCountInt32x4", argLength: 1, commutative: false}, + {name: "RotateLeftInt32x4", argLength: 2, commutative: false}, + {name: "RotateRightInt32x4", argLength: 2, commutative: false}, {name: "SaturatedPairDotProdAccumulateInt32x4", argLength: 3, commutative: false}, {name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 3, commutative: false}, + {name: "ShiftAllLeftInt32x4", argLength: 2, commutative: false}, + {name: "ShiftAllRightInt32x4", argLength: 2, commutative: false}, + {name: "ShiftAllRightSignExtendedInt32x4", argLength: 2, commutative: false}, + {name: "ShiftLeftInt32x4", argLength: 2, commutative: false}, + {name: "ShiftLeftAndFillUpperFromInt32x4", argLength: 3, commutative: false}, + {name: "ShiftRightInt32x4", argLength: 2, commutative: false}, + {name: "ShiftRightAndFillUpperFromInt32x4", argLength: 3, commutative: false}, + {name: "ShiftRightSignExtendedInt32x4", argLength: 2, commutative: false}, {name: "SignInt32x4", argLength: 2, commutative: false}, {name: "SubInt32x4", argLength: 2, commutative: false}, {name: "UnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 3, commutative: false}, @@ -553,8 +620,15 @@ func simdGenericOps() []opData { {name: "MaskedOrInt32x8", argLength: 3, commutative: true}, {name: "MaskedPairDotProdAccumulateInt32x8", argLength: 4, commutative: false}, {name: "MaskedPopCountInt32x8", argLength: 2, commutative: false}, + {name: "MaskedRotateLeftInt32x8", argLength: 3, commutative: false}, + {name: "MaskedRotateRightInt32x8", argLength: 3, commutative: false}, {name: "MaskedSaturatedPairDotProdAccumulateInt32x8", argLength: 4, commutative: false}, {name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 4, commutative: false}, + {name: "MaskedShiftLeftInt32x8", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftAndFillUpperFromInt32x8", argLength: 4, commutative: false}, + {name: "MaskedShiftRightInt32x8", argLength: 3, commutative: false}, + {name: "MaskedShiftRightAndFillUpperFromInt32x8", argLength: 4, commutative: false}, + {name: "MaskedShiftRightSignExtendedInt32x8", argLength: 3, commutative: false}, {name: "MaskedSubInt32x8", argLength: 3, commutative: false}, {name: "MaskedUnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 4, commutative: false}, {name: "MaskedXorInt32x8", argLength: 3, commutative: true}, @@ -568,8 +642,18 @@ func simdGenericOps() []opData { {name: "PairwiseAddInt32x8", argLength: 2, commutative: false}, {name: "PairwiseSubInt32x8", argLength: 2, commutative: false}, {name: "PopCountInt32x8", argLength: 1, commutative: false}, + {name: "RotateLeftInt32x8", argLength: 2, commutative: false}, + {name: "RotateRightInt32x8", argLength: 2, commutative: false}, {name: "SaturatedPairDotProdAccumulateInt32x8", argLength: 3, commutative: false}, {name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 3, commutative: false}, + {name: "ShiftAllLeftInt32x8", argLength: 2, commutative: false}, + {name: "ShiftAllRightInt32x8", argLength: 2, commutative: false}, + {name: "ShiftAllRightSignExtendedInt32x8", argLength: 2, commutative: false}, + {name: "ShiftLeftInt32x8", argLength: 2, commutative: false}, + {name: "ShiftLeftAndFillUpperFromInt32x8", argLength: 3, commutative: false}, + {name: "ShiftRightInt32x8", argLength: 2, commutative: false}, + {name: "ShiftRightAndFillUpperFromInt32x8", argLength: 3, commutative: false}, + {name: "ShiftRightSignExtendedInt32x8", argLength: 2, commutative: false}, {name: "SignInt32x8", argLength: 2, commutative: false}, {name: "SubInt32x8", argLength: 2, commutative: false}, {name: "UnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 3, commutative: false}, @@ -599,6 +683,16 @@ func simdGenericOps() []opData { {name: "MaskedNotEqualInt64x2", argLength: 3, commutative: true}, {name: "MaskedOrInt64x2", argLength: 3, commutative: true}, {name: "MaskedPopCountInt64x2", argLength: 2, commutative: false}, + {name: "MaskedRotateLeftInt64x2", argLength: 3, commutative: false}, + {name: "MaskedRotateRightInt64x2", argLength: 3, commutative: false}, + {name: "MaskedShiftAllLeftInt64x2", argLength: 3, commutative: false}, + {name: "MaskedShiftAllRightInt64x2", argLength: 3, commutative: false}, + {name: "MaskedShiftAllRightSignExtendedInt64x2", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftInt64x2", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftAndFillUpperFromInt64x2", argLength: 4, commutative: false}, + {name: "MaskedShiftRightInt64x2", argLength: 3, commutative: false}, + {name: "MaskedShiftRightAndFillUpperFromInt64x2", argLength: 4, commutative: false}, + {name: "MaskedShiftRightSignExtendedInt64x2", argLength: 3, commutative: false}, {name: "MaskedSubInt64x2", argLength: 3, commutative: false}, {name: "MaskedXorInt64x2", argLength: 3, commutative: true}, {name: "MaxInt64x2", argLength: 2, commutative: true}, @@ -608,6 +702,16 @@ func simdGenericOps() []opData { {name: "NotEqualInt64x2", argLength: 2, commutative: true}, {name: "OrInt64x2", argLength: 2, commutative: true}, {name: "PopCountInt64x2", argLength: 1, commutative: false}, + {name: "RotateLeftInt64x2", argLength: 2, commutative: false}, + {name: "RotateRightInt64x2", argLength: 2, commutative: false}, + {name: "ShiftAllLeftInt64x2", argLength: 2, commutative: false}, + {name: "ShiftAllRightInt64x2", argLength: 2, commutative: false}, + {name: "ShiftAllRightSignExtendedInt64x2", argLength: 2, commutative: false}, + {name: "ShiftLeftInt64x2", argLength: 2, commutative: false}, + {name: "ShiftLeftAndFillUpperFromInt64x2", argLength: 3, commutative: false}, + {name: "ShiftRightInt64x2", argLength: 2, commutative: false}, + {name: "ShiftRightAndFillUpperFromInt64x2", argLength: 3, commutative: false}, + {name: "ShiftRightSignExtendedInt64x2", argLength: 2, commutative: false}, {name: "SubInt64x2", argLength: 2, commutative: false}, {name: "XorInt64x2", argLength: 2, commutative: true}, {name: "AbsoluteInt64x4", argLength: 1, commutative: false}, @@ -635,6 +739,16 @@ func simdGenericOps() []opData { {name: "MaskedNotEqualInt64x4", argLength: 3, commutative: true}, {name: "MaskedOrInt64x4", argLength: 3, commutative: true}, {name: "MaskedPopCountInt64x4", argLength: 2, commutative: false}, + {name: "MaskedRotateLeftInt64x4", argLength: 3, commutative: false}, + {name: "MaskedRotateRightInt64x4", argLength: 3, commutative: false}, + {name: "MaskedShiftAllLeftInt64x4", argLength: 3, commutative: false}, + {name: "MaskedShiftAllRightInt64x4", argLength: 3, commutative: false}, + {name: "MaskedShiftAllRightSignExtendedInt64x4", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftInt64x4", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftAndFillUpperFromInt64x4", argLength: 4, commutative: false}, + {name: "MaskedShiftRightInt64x4", argLength: 3, commutative: false}, + {name: "MaskedShiftRightAndFillUpperFromInt64x4", argLength: 4, commutative: false}, + {name: "MaskedShiftRightSignExtendedInt64x4", argLength: 3, commutative: false}, {name: "MaskedSubInt64x4", argLength: 3, commutative: false}, {name: "MaskedXorInt64x4", argLength: 3, commutative: true}, {name: "MaxInt64x4", argLength: 2, commutative: true}, @@ -644,6 +758,16 @@ func simdGenericOps() []opData { {name: "NotEqualInt64x4", argLength: 2, commutative: true}, {name: "OrInt64x4", argLength: 2, commutative: true}, {name: "PopCountInt64x4", argLength: 1, commutative: false}, + {name: "RotateLeftInt64x4", argLength: 2, commutative: false}, + {name: "RotateRightInt64x4", argLength: 2, commutative: false}, + {name: "ShiftAllLeftInt64x4", argLength: 2, commutative: false}, + {name: "ShiftAllRightInt64x4", argLength: 2, commutative: false}, + {name: "ShiftAllRightSignExtendedInt64x4", argLength: 2, commutative: false}, + {name: "ShiftLeftInt64x4", argLength: 2, commutative: false}, + {name: "ShiftLeftAndFillUpperFromInt64x4", argLength: 3, commutative: false}, + {name: "ShiftRightInt64x4", argLength: 2, commutative: false}, + {name: "ShiftRightAndFillUpperFromInt64x4", argLength: 3, commutative: false}, + {name: "ShiftRightSignExtendedInt64x4", argLength: 2, commutative: false}, {name: "SubInt64x4", argLength: 2, commutative: false}, {name: "XorInt64x4", argLength: 2, commutative: true}, {name: "AbsoluteInt64x8", argLength: 1, commutative: false}, @@ -671,6 +795,16 @@ func simdGenericOps() []opData { {name: "MaskedNotEqualInt64x8", argLength: 3, commutative: true}, {name: "MaskedOrInt64x8", argLength: 3, commutative: true}, {name: "MaskedPopCountInt64x8", argLength: 2, commutative: false}, + {name: "MaskedRotateLeftInt64x8", argLength: 3, commutative: false}, + {name: "MaskedRotateRightInt64x8", argLength: 3, commutative: false}, + {name: "MaskedShiftAllLeftInt64x8", argLength: 3, commutative: false}, + {name: "MaskedShiftAllRightInt64x8", argLength: 3, commutative: false}, + {name: "MaskedShiftAllRightSignExtendedInt64x8", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftInt64x8", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftAndFillUpperFromInt64x8", argLength: 4, commutative: false}, + {name: "MaskedShiftRightInt64x8", argLength: 3, commutative: false}, + {name: "MaskedShiftRightAndFillUpperFromInt64x8", argLength: 4, commutative: false}, + {name: "MaskedShiftRightSignExtendedInt64x8", argLength: 3, commutative: false}, {name: "MaskedSubInt64x8", argLength: 3, commutative: false}, {name: "MaskedXorInt64x8", argLength: 3, commutative: true}, {name: "MaxInt64x8", argLength: 2, commutative: true}, @@ -680,6 +814,16 @@ func simdGenericOps() []opData { {name: "NotEqualInt64x8", argLength: 2, commutative: true}, {name: "OrInt64x8", argLength: 2, commutative: true}, {name: "PopCountInt64x8", argLength: 1, commutative: false}, + {name: "RotateLeftInt64x8", argLength: 2, commutative: false}, + {name: "RotateRightInt64x8", argLength: 2, commutative: false}, + {name: "ShiftAllLeftInt64x8", argLength: 2, commutative: false}, + {name: "ShiftAllRightInt64x8", argLength: 2, commutative: false}, + {name: "ShiftAllRightSignExtendedInt64x8", argLength: 2, commutative: false}, + {name: "ShiftLeftInt64x8", argLength: 2, commutative: false}, + {name: "ShiftLeftAndFillUpperFromInt64x8", argLength: 3, commutative: false}, + {name: "ShiftRightInt64x8", argLength: 2, commutative: false}, + {name: "ShiftRightAndFillUpperFromInt64x8", argLength: 3, commutative: false}, + {name: "ShiftRightSignExtendedInt64x8", argLength: 2, commutative: false}, {name: "SubInt64x8", argLength: 2, commutative: false}, {name: "XorInt64x8", argLength: 2, commutative: true}, {name: "AbsoluteInt8x16", argLength: 1, commutative: false}, @@ -799,6 +943,11 @@ func simdGenericOps() []opData { {name: "MaskedPopCountUint16x16", argLength: 2, commutative: false}, {name: "MaskedSaturatedAddUint16x16", argLength: 3, commutative: true}, {name: "MaskedSaturatedSubUint16x16", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftUint16x16", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftAndFillUpperFromUint16x16", argLength: 4, commutative: false}, + {name: "MaskedShiftRightUint16x16", argLength: 3, commutative: false}, + {name: "MaskedShiftRightAndFillUpperFromUint16x16", argLength: 4, commutative: false}, + {name: "MaskedShiftRightSignExtendedUint16x16", argLength: 3, commutative: false}, {name: "MaskedSubUint16x16", argLength: 3, commutative: false}, {name: "MaxUint16x16", argLength: 2, commutative: true}, {name: "MinUint16x16", argLength: 2, commutative: true}, @@ -810,6 +959,13 @@ func simdGenericOps() []opData { {name: "PopCountUint16x16", argLength: 1, commutative: false}, {name: "SaturatedAddUint16x16", argLength: 2, commutative: true}, {name: "SaturatedSubUint16x16", argLength: 2, commutative: false}, + {name: "ShiftAllLeftUint16x16", argLength: 2, commutative: false}, + {name: "ShiftAllRightUint16x16", argLength: 2, commutative: false}, + {name: "ShiftLeftUint16x16", argLength: 2, commutative: false}, + {name: "ShiftLeftAndFillUpperFromUint16x16", argLength: 3, commutative: false}, + {name: "ShiftRightUint16x16", argLength: 2, commutative: false}, + {name: "ShiftRightAndFillUpperFromUint16x16", argLength: 3, commutative: false}, + {name: "ShiftRightSignExtendedUint16x16", argLength: 2, commutative: false}, {name: "SubUint16x16", argLength: 2, commutative: false}, {name: "XorUint16x16", argLength: 2, commutative: true}, {name: "AddUint16x32", argLength: 2, commutative: true}, @@ -833,6 +989,11 @@ func simdGenericOps() []opData { {name: "MaskedPopCountUint16x32", argLength: 2, commutative: false}, {name: "MaskedSaturatedAddUint16x32", argLength: 3, commutative: true}, {name: "MaskedSaturatedSubUint16x32", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftUint16x32", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftAndFillUpperFromUint16x32", argLength: 4, commutative: false}, + {name: "MaskedShiftRightUint16x32", argLength: 3, commutative: false}, + {name: "MaskedShiftRightAndFillUpperFromUint16x32", argLength: 4, commutative: false}, + {name: "MaskedShiftRightSignExtendedUint16x32", argLength: 3, commutative: false}, {name: "MaskedSubUint16x32", argLength: 3, commutative: false}, {name: "MaxUint16x32", argLength: 2, commutative: true}, {name: "MinUint16x32", argLength: 2, commutative: true}, @@ -841,6 +1002,11 @@ func simdGenericOps() []opData { {name: "PopCountUint16x32", argLength: 1, commutative: false}, {name: "SaturatedAddUint16x32", argLength: 2, commutative: true}, {name: "SaturatedSubUint16x32", argLength: 2, commutative: false}, + {name: "ShiftLeftUint16x32", argLength: 2, commutative: false}, + {name: "ShiftLeftAndFillUpperFromUint16x32", argLength: 3, commutative: false}, + {name: "ShiftRightUint16x32", argLength: 2, commutative: false}, + {name: "ShiftRightAndFillUpperFromUint16x32", argLength: 3, commutative: false}, + {name: "ShiftRightSignExtendedUint16x32", argLength: 2, commutative: false}, {name: "SubUint16x32", argLength: 2, commutative: false}, {name: "AddUint16x8", argLength: 2, commutative: true}, {name: "AndUint16x8", argLength: 2, commutative: true}, @@ -865,6 +1031,11 @@ func simdGenericOps() []opData { {name: "MaskedPopCountUint16x8", argLength: 2, commutative: false}, {name: "MaskedSaturatedAddUint16x8", argLength: 3, commutative: true}, {name: "MaskedSaturatedSubUint16x8", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftUint16x8", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftAndFillUpperFromUint16x8", argLength: 4, commutative: false}, + {name: "MaskedShiftRightUint16x8", argLength: 3, commutative: false}, + {name: "MaskedShiftRightAndFillUpperFromUint16x8", argLength: 4, commutative: false}, + {name: "MaskedShiftRightSignExtendedUint16x8", argLength: 3, commutative: false}, {name: "MaskedSubUint16x8", argLength: 3, commutative: false}, {name: "MaxUint16x8", argLength: 2, commutative: true}, {name: "MinUint16x8", argLength: 2, commutative: true}, @@ -876,6 +1047,13 @@ func simdGenericOps() []opData { {name: "PopCountUint16x8", argLength: 1, commutative: false}, {name: "SaturatedAddUint16x8", argLength: 2, commutative: true}, {name: "SaturatedSubUint16x8", argLength: 2, commutative: false}, + {name: "ShiftAllLeftUint16x8", argLength: 2, commutative: false}, + {name: "ShiftAllRightUint16x8", argLength: 2, commutative: false}, + {name: "ShiftLeftUint16x8", argLength: 2, commutative: false}, + {name: "ShiftLeftAndFillUpperFromUint16x8", argLength: 3, commutative: false}, + {name: "ShiftRightUint16x8", argLength: 2, commutative: false}, + {name: "ShiftRightAndFillUpperFromUint16x8", argLength: 3, commutative: false}, + {name: "ShiftRightSignExtendedUint16x8", argLength: 2, commutative: false}, {name: "SubUint16x8", argLength: 2, commutative: false}, {name: "XorUint16x8", argLength: 2, commutative: true}, {name: "AddUint32x16", argLength: 2, commutative: true}, @@ -899,7 +1077,14 @@ func simdGenericOps() []opData { {name: "MaskedNotEqualUint32x16", argLength: 3, commutative: true}, {name: "MaskedOrUint32x16", argLength: 3, commutative: true}, {name: "MaskedPopCountUint32x16", argLength: 2, commutative: false}, + {name: "MaskedRotateLeftUint32x16", argLength: 3, commutative: false}, + {name: "MaskedRotateRightUint32x16", argLength: 3, commutative: false}, {name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 4, commutative: false}, + {name: "MaskedShiftLeftUint32x16", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftAndFillUpperFromUint32x16", argLength: 4, commutative: false}, + {name: "MaskedShiftRightUint32x16", argLength: 3, commutative: false}, + {name: "MaskedShiftRightAndFillUpperFromUint32x16", argLength: 4, commutative: false}, + {name: "MaskedShiftRightSignExtendedUint32x16", argLength: 3, commutative: false}, {name: "MaskedSubUint32x16", argLength: 3, commutative: false}, {name: "MaskedUnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 4, commutative: false}, {name: "MaskedXorUint32x16", argLength: 3, commutative: true}, @@ -908,7 +1093,14 @@ func simdGenericOps() []opData { {name: "NotEqualUint32x16", argLength: 2, commutative: true}, {name: "OrUint32x16", argLength: 2, commutative: true}, {name: "PopCountUint32x16", argLength: 1, commutative: false}, + {name: "RotateLeftUint32x16", argLength: 2, commutative: false}, + {name: "RotateRightUint32x16", argLength: 2, commutative: false}, {name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 3, commutative: false}, + {name: "ShiftLeftUint32x16", argLength: 2, commutative: false}, + {name: "ShiftLeftAndFillUpperFromUint32x16", argLength: 3, commutative: false}, + {name: "ShiftRightUint32x16", argLength: 2, commutative: false}, + {name: "ShiftRightAndFillUpperFromUint32x16", argLength: 3, commutative: false}, + {name: "ShiftRightSignExtendedUint32x16", argLength: 2, commutative: false}, {name: "SubUint32x16", argLength: 2, commutative: false}, {name: "UnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 3, commutative: false}, {name: "XorUint32x16", argLength: 2, commutative: true}, @@ -933,7 +1125,14 @@ func simdGenericOps() []opData { {name: "MaskedNotEqualUint32x4", argLength: 3, commutative: true}, {name: "MaskedOrUint32x4", argLength: 3, commutative: true}, {name: "MaskedPopCountUint32x4", argLength: 2, commutative: false}, + {name: "MaskedRotateLeftUint32x4", argLength: 3, commutative: false}, + {name: "MaskedRotateRightUint32x4", argLength: 3, commutative: false}, {name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 4, commutative: false}, + {name: "MaskedShiftLeftUint32x4", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftAndFillUpperFromUint32x4", argLength: 4, commutative: false}, + {name: "MaskedShiftRightUint32x4", argLength: 3, commutative: false}, + {name: "MaskedShiftRightAndFillUpperFromUint32x4", argLength: 4, commutative: false}, + {name: "MaskedShiftRightSignExtendedUint32x4", argLength: 3, commutative: false}, {name: "MaskedSubUint32x4", argLength: 3, commutative: false}, {name: "MaskedUnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 4, commutative: false}, {name: "MaskedXorUint32x4", argLength: 3, commutative: true}, @@ -945,7 +1144,16 @@ func simdGenericOps() []opData { {name: "PairwiseAddUint32x4", argLength: 2, commutative: false}, {name: "PairwiseSubUint32x4", argLength: 2, commutative: false}, {name: "PopCountUint32x4", argLength: 1, commutative: false}, + {name: "RotateLeftUint32x4", argLength: 2, commutative: false}, + {name: "RotateRightUint32x4", argLength: 2, commutative: false}, {name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 3, commutative: false}, + {name: "ShiftAllLeftUint32x4", argLength: 2, commutative: false}, + {name: "ShiftAllRightUint32x4", argLength: 2, commutative: false}, + {name: "ShiftLeftUint32x4", argLength: 2, commutative: false}, + {name: "ShiftLeftAndFillUpperFromUint32x4", argLength: 3, commutative: false}, + {name: "ShiftRightUint32x4", argLength: 2, commutative: false}, + {name: "ShiftRightAndFillUpperFromUint32x4", argLength: 3, commutative: false}, + {name: "ShiftRightSignExtendedUint32x4", argLength: 2, commutative: false}, {name: "SubUint32x4", argLength: 2, commutative: false}, {name: "UnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 3, commutative: false}, {name: "XorUint32x4", argLength: 2, commutative: true}, @@ -970,7 +1178,14 @@ func simdGenericOps() []opData { {name: "MaskedNotEqualUint32x8", argLength: 3, commutative: true}, {name: "MaskedOrUint32x8", argLength: 3, commutative: true}, {name: "MaskedPopCountUint32x8", argLength: 2, commutative: false}, + {name: "MaskedRotateLeftUint32x8", argLength: 3, commutative: false}, + {name: "MaskedRotateRightUint32x8", argLength: 3, commutative: false}, {name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 4, commutative: false}, + {name: "MaskedShiftLeftUint32x8", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftAndFillUpperFromUint32x8", argLength: 4, commutative: false}, + {name: "MaskedShiftRightUint32x8", argLength: 3, commutative: false}, + {name: "MaskedShiftRightAndFillUpperFromUint32x8", argLength: 4, commutative: false}, + {name: "MaskedShiftRightSignExtendedUint32x8", argLength: 3, commutative: false}, {name: "MaskedSubUint32x8", argLength: 3, commutative: false}, {name: "MaskedUnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 4, commutative: false}, {name: "MaskedXorUint32x8", argLength: 3, commutative: true}, @@ -982,7 +1197,16 @@ func simdGenericOps() []opData { {name: "PairwiseAddUint32x8", argLength: 2, commutative: false}, {name: "PairwiseSubUint32x8", argLength: 2, commutative: false}, {name: "PopCountUint32x8", argLength: 1, commutative: false}, + {name: "RotateLeftUint32x8", argLength: 2, commutative: false}, + {name: "RotateRightUint32x8", argLength: 2, commutative: false}, {name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 3, commutative: false}, + {name: "ShiftAllLeftUint32x8", argLength: 2, commutative: false}, + {name: "ShiftAllRightUint32x8", argLength: 2, commutative: false}, + {name: "ShiftLeftUint32x8", argLength: 2, commutative: false}, + {name: "ShiftLeftAndFillUpperFromUint32x8", argLength: 3, commutative: false}, + {name: "ShiftRightUint32x8", argLength: 2, commutative: false}, + {name: "ShiftRightAndFillUpperFromUint32x8", argLength: 3, commutative: false}, + {name: "ShiftRightSignExtendedUint32x8", argLength: 2, commutative: false}, {name: "SubUint32x8", argLength: 2, commutative: false}, {name: "UnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 3, commutative: false}, {name: "XorUint32x8", argLength: 2, commutative: true}, @@ -1008,6 +1232,15 @@ func simdGenericOps() []opData { {name: "MaskedNotEqualUint64x2", argLength: 3, commutative: true}, {name: "MaskedOrUint64x2", argLength: 3, commutative: true}, {name: "MaskedPopCountUint64x2", argLength: 2, commutative: false}, + {name: "MaskedRotateLeftUint64x2", argLength: 3, commutative: false}, + {name: "MaskedRotateRightUint64x2", argLength: 3, commutative: false}, + {name: "MaskedShiftAllLeftUint64x2", argLength: 3, commutative: false}, + {name: "MaskedShiftAllRightUint64x2", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftUint64x2", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftAndFillUpperFromUint64x2", argLength: 4, commutative: false}, + {name: "MaskedShiftRightUint64x2", argLength: 3, commutative: false}, + {name: "MaskedShiftRightAndFillUpperFromUint64x2", argLength: 4, commutative: false}, + {name: "MaskedShiftRightSignExtendedUint64x2", argLength: 3, commutative: false}, {name: "MaskedSubUint64x2", argLength: 3, commutative: false}, {name: "MaskedXorUint64x2", argLength: 3, commutative: true}, {name: "MaxUint64x2", argLength: 2, commutative: true}, @@ -1016,6 +1249,15 @@ func simdGenericOps() []opData { {name: "NotEqualUint64x2", argLength: 2, commutative: true}, {name: "OrUint64x2", argLength: 2, commutative: true}, {name: "PopCountUint64x2", argLength: 1, commutative: false}, + {name: "RotateLeftUint64x2", argLength: 2, commutative: false}, + {name: "RotateRightUint64x2", argLength: 2, commutative: false}, + {name: "ShiftAllLeftUint64x2", argLength: 2, commutative: false}, + {name: "ShiftAllRightUint64x2", argLength: 2, commutative: false}, + {name: "ShiftLeftUint64x2", argLength: 2, commutative: false}, + {name: "ShiftLeftAndFillUpperFromUint64x2", argLength: 3, commutative: false}, + {name: "ShiftRightUint64x2", argLength: 2, commutative: false}, + {name: "ShiftRightAndFillUpperFromUint64x2", argLength: 3, commutative: false}, + {name: "ShiftRightSignExtendedUint64x2", argLength: 2, commutative: false}, {name: "SubUint64x2", argLength: 2, commutative: false}, {name: "XorUint64x2", argLength: 2, commutative: true}, {name: "AddUint64x4", argLength: 2, commutative: true}, @@ -1040,6 +1282,15 @@ func simdGenericOps() []opData { {name: "MaskedNotEqualUint64x4", argLength: 3, commutative: true}, {name: "MaskedOrUint64x4", argLength: 3, commutative: true}, {name: "MaskedPopCountUint64x4", argLength: 2, commutative: false}, + {name: "MaskedRotateLeftUint64x4", argLength: 3, commutative: false}, + {name: "MaskedRotateRightUint64x4", argLength: 3, commutative: false}, + {name: "MaskedShiftAllLeftUint64x4", argLength: 3, commutative: false}, + {name: "MaskedShiftAllRightUint64x4", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftUint64x4", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftAndFillUpperFromUint64x4", argLength: 4, commutative: false}, + {name: "MaskedShiftRightUint64x4", argLength: 3, commutative: false}, + {name: "MaskedShiftRightAndFillUpperFromUint64x4", argLength: 4, commutative: false}, + {name: "MaskedShiftRightSignExtendedUint64x4", argLength: 3, commutative: false}, {name: "MaskedSubUint64x4", argLength: 3, commutative: false}, {name: "MaskedXorUint64x4", argLength: 3, commutative: true}, {name: "MaxUint64x4", argLength: 2, commutative: true}, @@ -1048,6 +1299,15 @@ func simdGenericOps() []opData { {name: "NotEqualUint64x4", argLength: 2, commutative: true}, {name: "OrUint64x4", argLength: 2, commutative: true}, {name: "PopCountUint64x4", argLength: 1, commutative: false}, + {name: "RotateLeftUint64x4", argLength: 2, commutative: false}, + {name: "RotateRightUint64x4", argLength: 2, commutative: false}, + {name: "ShiftAllLeftUint64x4", argLength: 2, commutative: false}, + {name: "ShiftAllRightUint64x4", argLength: 2, commutative: false}, + {name: "ShiftLeftUint64x4", argLength: 2, commutative: false}, + {name: "ShiftLeftAndFillUpperFromUint64x4", argLength: 3, commutative: false}, + {name: "ShiftRightUint64x4", argLength: 2, commutative: false}, + {name: "ShiftRightAndFillUpperFromUint64x4", argLength: 3, commutative: false}, + {name: "ShiftRightSignExtendedUint64x4", argLength: 2, commutative: false}, {name: "SubUint64x4", argLength: 2, commutative: false}, {name: "XorUint64x4", argLength: 2, commutative: true}, {name: "AddUint64x8", argLength: 2, commutative: true}, @@ -1072,6 +1332,15 @@ func simdGenericOps() []opData { {name: "MaskedNotEqualUint64x8", argLength: 3, commutative: true}, {name: "MaskedOrUint64x8", argLength: 3, commutative: true}, {name: "MaskedPopCountUint64x8", argLength: 2, commutative: false}, + {name: "MaskedRotateLeftUint64x8", argLength: 3, commutative: false}, + {name: "MaskedRotateRightUint64x8", argLength: 3, commutative: false}, + {name: "MaskedShiftAllLeftUint64x8", argLength: 3, commutative: false}, + {name: "MaskedShiftAllRightUint64x8", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftUint64x8", argLength: 3, commutative: false}, + {name: "MaskedShiftLeftAndFillUpperFromUint64x8", argLength: 4, commutative: false}, + {name: "MaskedShiftRightUint64x8", argLength: 3, commutative: false}, + {name: "MaskedShiftRightAndFillUpperFromUint64x8", argLength: 4, commutative: false}, + {name: "MaskedShiftRightSignExtendedUint64x8", argLength: 3, commutative: false}, {name: "MaskedSubUint64x8", argLength: 3, commutative: false}, {name: "MaskedXorUint64x8", argLength: 3, commutative: true}, {name: "MaxUint64x8", argLength: 2, commutative: true}, @@ -1080,6 +1349,15 @@ func simdGenericOps() []opData { {name: "NotEqualUint64x8", argLength: 2, commutative: true}, {name: "OrUint64x8", argLength: 2, commutative: true}, {name: "PopCountUint64x8", argLength: 1, commutative: false}, + {name: "RotateLeftUint64x8", argLength: 2, commutative: false}, + {name: "RotateRightUint64x8", argLength: 2, commutative: false}, + {name: "ShiftAllLeftUint64x8", argLength: 2, commutative: false}, + {name: "ShiftAllRightUint64x8", argLength: 2, commutative: false}, + {name: "ShiftLeftUint64x8", argLength: 2, commutative: false}, + {name: "ShiftLeftAndFillUpperFromUint64x8", argLength: 3, commutative: false}, + {name: "ShiftRightUint64x8", argLength: 2, commutative: false}, + {name: "ShiftRightAndFillUpperFromUint64x8", argLength: 3, commutative: false}, + {name: "ShiftRightSignExtendedUint64x8", argLength: 2, commutative: false}, {name: "SubUint64x8", argLength: 2, commutative: false}, {name: "XorUint64x8", argLength: 2, commutative: true}, {name: "AddUint8x16", argLength: 2, commutative: true}, @@ -1372,20 +1650,140 @@ func simdGenericOps() []opData { {name: "RoundWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, {name: "TruncSuppressExceptionWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, {name: "TruncWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllLeftAndFillUpperFromInt16x16", argLength: 3, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllRightAndFillUpperFromInt16x16", argLength: 3, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromInt16x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromInt16x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllLeftAndFillUpperFromInt16x32", argLength: 3, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllRightAndFillUpperFromInt16x32", argLength: 3, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromInt16x32", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromInt16x32", argLength: 2, commutative: false, aux: "Int8"}, {name: "GetElemInt16x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllLeftAndFillUpperFromInt16x8", argLength: 3, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllRightAndFillUpperFromInt16x8", argLength: 3, commutative: false, aux: "Int8"}, {name: "SetElemInt16x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromInt16x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromInt16x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRotateAllLeftInt32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRotateAllRightInt32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllLeftAndFillUpperFromInt32x16", argLength: 3, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllRightAndFillUpperFromInt32x16", argLength: 3, commutative: false, aux: "Int8"}, + {name: "RotateAllLeftInt32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllRightInt32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromInt32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromInt32x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "GetElemInt32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "MaskedRotateAllLeftInt32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRotateAllRightInt32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllLeftAndFillUpperFromInt32x4", argLength: 3, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllRightAndFillUpperFromInt32x4", argLength: 3, commutative: false, aux: "Int8"}, + {name: "RotateAllLeftInt32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllRightInt32x4", argLength: 1, commutative: false, aux: "Int8"}, {name: "SetElemInt32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromInt32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromInt32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRotateAllLeftInt32x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRotateAllRightInt32x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllLeftAndFillUpperFromInt32x8", argLength: 3, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllRightAndFillUpperFromInt32x8", argLength: 3, commutative: false, aux: "Int8"}, + {name: "RotateAllLeftInt32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllRightInt32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromInt32x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromInt32x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "GetElemInt64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "MaskedRotateAllLeftInt64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRotateAllRightInt64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllLeftAndFillUpperFromInt64x2", argLength: 3, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllRightAndFillUpperFromInt64x2", argLength: 3, commutative: false, aux: "Int8"}, + {name: "RotateAllLeftInt64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllRightInt64x2", argLength: 1, commutative: false, aux: "Int8"}, {name: "SetElemInt64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromInt64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromInt64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRotateAllLeftInt64x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRotateAllRightInt64x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllLeftAndFillUpperFromInt64x4", argLength: 3, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllRightAndFillUpperFromInt64x4", argLength: 3, commutative: false, aux: "Int8"}, + {name: "RotateAllLeftInt64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllRightInt64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromInt64x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromInt64x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRotateAllLeftInt64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRotateAllRightInt64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllLeftAndFillUpperFromInt64x8", argLength: 3, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllRightAndFillUpperFromInt64x8", argLength: 3, commutative: false, aux: "Int8"}, + {name: "RotateAllLeftInt64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllRightInt64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromInt64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromInt64x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "GetElemInt8x16", argLength: 1, commutative: false, aux: "Int8"}, {name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllLeftAndFillUpperFromUint16x16", argLength: 3, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllRightAndFillUpperFromUint16x16", argLength: 3, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromUint16x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromUint16x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllLeftAndFillUpperFromUint16x32", argLength: 3, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllRightAndFillUpperFromUint16x32", argLength: 3, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromUint16x32", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromUint16x32", argLength: 2, commutative: false, aux: "Int8"}, {name: "GetElemUint16x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllLeftAndFillUpperFromUint16x8", argLength: 3, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllRightAndFillUpperFromUint16x8", argLength: 3, commutative: false, aux: "Int8"}, {name: "SetElemUint16x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromUint16x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromUint16x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRotateAllLeftUint32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRotateAllRightUint32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllLeftAndFillUpperFromUint32x16", argLength: 3, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllRightAndFillUpperFromUint32x16", argLength: 3, commutative: false, aux: "Int8"}, + {name: "RotateAllLeftUint32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllRightUint32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromUint32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromUint32x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "GetElemUint32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "MaskedRotateAllLeftUint32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRotateAllRightUint32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllLeftAndFillUpperFromUint32x4", argLength: 3, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllRightAndFillUpperFromUint32x4", argLength: 3, commutative: false, aux: "Int8"}, + {name: "RotateAllLeftUint32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllRightUint32x4", argLength: 1, commutative: false, aux: "Int8"}, {name: "SetElemUint32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromUint32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromUint32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRotateAllLeftUint32x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRotateAllRightUint32x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllLeftAndFillUpperFromUint32x8", argLength: 3, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllRightAndFillUpperFromUint32x8", argLength: 3, commutative: false, aux: "Int8"}, + {name: "RotateAllLeftUint32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllRightUint32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromUint32x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromUint32x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "GetElemUint64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "MaskedRotateAllLeftUint64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRotateAllRightUint64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllLeftAndFillUpperFromUint64x2", argLength: 3, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllRightAndFillUpperFromUint64x2", argLength: 3, commutative: false, aux: "Int8"}, + {name: "RotateAllLeftUint64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllRightUint64x2", argLength: 1, commutative: false, aux: "Int8"}, {name: "SetElemUint64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromUint64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromUint64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRotateAllLeftUint64x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRotateAllRightUint64x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllLeftAndFillUpperFromUint64x4", argLength: 3, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllRightAndFillUpperFromUint64x4", argLength: 3, commutative: false, aux: "Int8"}, + {name: "RotateAllLeftUint64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllRightUint64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromUint64x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromUint64x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRotateAllLeftUint64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRotateAllRightUint64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllLeftAndFillUpperFromUint64x8", argLength: 3, commutative: false, aux: "Int8"}, + {name: "MaskedShiftAllRightAndFillUpperFromUint64x8", argLength: 3, commutative: false, aux: "Int8"}, + {name: "RotateAllLeftUint64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllRightUint64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromUint64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromUint64x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "GetElemUint8x16", argLength: 1, commutative: false, aux: "Int8"}, {name: "SetElemUint8x16", argLength: 2, commutative: false, aux: "Int8"}, } diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 7a1126d433f..2bdbd5156e1 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1426,6 +1426,11 @@ const ( OpAMD64VPOPCNTWMasked256 OpAMD64VPADDSWMasked256 OpAMD64VPSUBSWMasked256 + OpAMD64VPSLLVWMasked256 + OpAMD64VPSHLDVWMasked256 + OpAMD64VPSRLVWMasked256 + OpAMD64VPSHRDVWMasked256 + OpAMD64VPSRAVWMasked256 OpAMD64VPSUBWMasked256 OpAMD64VPMAXSW256 OpAMD64VPMINSW256 @@ -1439,6 +1444,14 @@ const ( OpAMD64VPHADDSW256 OpAMD64VPHSUBSW256 OpAMD64VPSUBSW256 + OpAMD64VPSLLW256 + OpAMD64VPSRLW256 + OpAMD64VPSRAW256 + OpAMD64VPSLLVW256 + OpAMD64VPSHLDVW256 + OpAMD64VPSRLVW256 + OpAMD64VPSHRDVW256 + OpAMD64VPSRAVW256 OpAMD64VPSIGNW256 OpAMD64VPSUBW256 OpAMD64VPABSW512 @@ -1453,6 +1466,11 @@ const ( OpAMD64VPOPCNTWMasked512 OpAMD64VPADDSWMasked512 OpAMD64VPSUBSWMasked512 + OpAMD64VPSLLVWMasked512 + OpAMD64VPSHLDVWMasked512 + OpAMD64VPSRLVWMasked512 + OpAMD64VPSHRDVWMasked512 + OpAMD64VPSRAVWMasked512 OpAMD64VPSUBWMasked512 OpAMD64VPMAXSW512 OpAMD64VPMINSW512 @@ -1462,6 +1480,11 @@ const ( OpAMD64VPOPCNTW512 OpAMD64VPADDSW512 OpAMD64VPSUBSW512 + OpAMD64VPSLLVW512 + OpAMD64VPSHLDVW512 + OpAMD64VPSRLVW512 + OpAMD64VPSHRDVW512 + OpAMD64VPSRAVW512 OpAMD64VPSUBW512 OpAMD64VPABSW128 OpAMD64VPADDW128 @@ -1477,6 +1500,11 @@ const ( OpAMD64VPOPCNTWMasked128 OpAMD64VPADDSWMasked128 OpAMD64VPSUBSWMasked128 + OpAMD64VPSLLVWMasked128 + OpAMD64VPSHLDVWMasked128 + OpAMD64VPSRLVWMasked128 + OpAMD64VPSHRDVWMasked128 + OpAMD64VPSRAVWMasked128 OpAMD64VPSUBWMasked128 OpAMD64VPMAXSW128 OpAMD64VPMINSW128 @@ -1490,6 +1518,14 @@ const ( OpAMD64VPHADDSW128 OpAMD64VPHSUBSW128 OpAMD64VPSUBSW128 + OpAMD64VPSLLW128 + OpAMD64VPSRLW128 + OpAMD64VPSRAW128 + OpAMD64VPSLLVW128 + OpAMD64VPSHLDVW128 + OpAMD64VPSRLVW128 + OpAMD64VPSHRDVW128 + OpAMD64VPSRAVW128 OpAMD64VPSIGNW128 OpAMD64VPSUBW128 OpAMD64VPABSD512 @@ -1506,8 +1542,15 @@ const ( OpAMD64VPORDMasked512 OpAMD64VPDPWSSDMasked512 OpAMD64VPOPCNTDMasked512 + OpAMD64VPROLVDMasked512 + OpAMD64VPRORVDMasked512 OpAMD64VPDPWSSDSMasked512 OpAMD64VPDPBUSDSMasked512 + OpAMD64VPSLLVDMasked512 + OpAMD64VPSHLDVDMasked512 + OpAMD64VPSRLVDMasked512 + OpAMD64VPSHRDVDMasked512 + OpAMD64VPSRAVDMasked512 OpAMD64VPSUBDMasked512 OpAMD64VPDPBUSDMasked512 OpAMD64VPXORDMasked512 @@ -1517,8 +1560,15 @@ const ( OpAMD64VPORD512 OpAMD64VPDPWSSD512 OpAMD64VPOPCNTD512 + OpAMD64VPROLVD512 + OpAMD64VPRORVD512 OpAMD64VPDPWSSDS512 OpAMD64VPDPBUSDS512 + OpAMD64VPSLLVD512 + OpAMD64VPSHLDVD512 + OpAMD64VPSRLVD512 + OpAMD64VPSHRDVD512 + OpAMD64VPSRAVD512 OpAMD64VPSUBD512 OpAMD64VPDPBUSD512 OpAMD64VPXORD512 @@ -1536,8 +1586,15 @@ const ( OpAMD64VPORDMasked128 OpAMD64VPDPWSSDMasked128 OpAMD64VPOPCNTDMasked128 + OpAMD64VPROLVDMasked128 + OpAMD64VPRORVDMasked128 OpAMD64VPDPWSSDSMasked128 OpAMD64VPDPBUSDSMasked128 + OpAMD64VPSLLVDMasked128 + OpAMD64VPSHLDVDMasked128 + OpAMD64VPSRLVDMasked128 + OpAMD64VPSHRDVDMasked128 + OpAMD64VPSRAVDMasked128 OpAMD64VPSUBDMasked128 OpAMD64VPDPBUSDMasked128 OpAMD64VPXORDMasked128 @@ -1549,8 +1606,18 @@ const ( OpAMD64VPHADDD128 OpAMD64VPHSUBD128 OpAMD64VPOPCNTD128 + OpAMD64VPROLVD128 + OpAMD64VPRORVD128 OpAMD64VPDPWSSDS128 OpAMD64VPDPBUSDS128 + OpAMD64VPSLLD128 + OpAMD64VPSRLD128 + OpAMD64VPSRAD128 + OpAMD64VPSLLVD128 + OpAMD64VPSHLDVD128 + OpAMD64VPSRLVD128 + OpAMD64VPSHRDVD128 + OpAMD64VPSRAVD128 OpAMD64VPSIGND128 OpAMD64VPSUBD128 OpAMD64VPDPBUSD128 @@ -1568,8 +1635,15 @@ const ( OpAMD64VPORDMasked256 OpAMD64VPDPWSSDMasked256 OpAMD64VPOPCNTDMasked256 + OpAMD64VPROLVDMasked256 + OpAMD64VPRORVDMasked256 OpAMD64VPDPWSSDSMasked256 OpAMD64VPDPBUSDSMasked256 + OpAMD64VPSLLVDMasked256 + OpAMD64VPSHLDVDMasked256 + OpAMD64VPSRLVDMasked256 + OpAMD64VPSHRDVDMasked256 + OpAMD64VPSRAVDMasked256 OpAMD64VPSUBDMasked256 OpAMD64VPDPBUSDMasked256 OpAMD64VPXORDMasked256 @@ -1581,8 +1655,18 @@ const ( OpAMD64VPHADDD256 OpAMD64VPHSUBD256 OpAMD64VPOPCNTD256 + OpAMD64VPROLVD256 + OpAMD64VPRORVD256 OpAMD64VPDPWSSDS256 OpAMD64VPDPBUSDS256 + OpAMD64VPSLLD256 + OpAMD64VPSRLD256 + OpAMD64VPSRAD256 + OpAMD64VPSLLVD256 + OpAMD64VPSHLDVD256 + OpAMD64VPSRLVD256 + OpAMD64VPSHRDVD256 + OpAMD64VPSRAVD256 OpAMD64VPSIGND256 OpAMD64VPSUBD256 OpAMD64VPDPBUSD256 @@ -1599,12 +1683,32 @@ const ( OpAMD64VPMULLQMasked128 OpAMD64VPORQMasked128 OpAMD64VPOPCNTQMasked128 + OpAMD64VPROLVQMasked128 + OpAMD64VPRORVQMasked128 + OpAMD64VPSLLQMasked128 + OpAMD64VPSRLQMasked128 + OpAMD64VPSRAQMasked128 + OpAMD64VPSLLVQMasked128 + OpAMD64VPSHLDVQMasked128 + OpAMD64VPSRLVQMasked128 + OpAMD64VPSHRDVQMasked128 + OpAMD64VPSRAVQMasked128 OpAMD64VPSUBQMasked128 OpAMD64VPXORQMasked128 OpAMD64VPMAXSQ128 OpAMD64VPMINSQ128 OpAMD64VPMULLQ128 OpAMD64VPOPCNTQ128 + OpAMD64VPROLVQ128 + OpAMD64VPRORVQ128 + OpAMD64VPSLLQ128 + OpAMD64VPSRLQ128 + OpAMD64VPSRAQ128 + OpAMD64VPSLLVQ128 + OpAMD64VPSHLDVQ128 + OpAMD64VPSRLVQ128 + OpAMD64VPSHRDVQ128 + OpAMD64VPSRAVQ128 OpAMD64VPSUBQ128 OpAMD64VPABSQ256 OpAMD64VPADDQ256 @@ -1620,12 +1724,32 @@ const ( OpAMD64VPMULLQMasked256 OpAMD64VPORQMasked256 OpAMD64VPOPCNTQMasked256 + OpAMD64VPROLVQMasked256 + OpAMD64VPRORVQMasked256 + OpAMD64VPSLLQMasked256 + OpAMD64VPSRLQMasked256 + OpAMD64VPSRAQMasked256 + OpAMD64VPSLLVQMasked256 + OpAMD64VPSHLDVQMasked256 + OpAMD64VPSRLVQMasked256 + OpAMD64VPSHRDVQMasked256 + OpAMD64VPSRAVQMasked256 OpAMD64VPSUBQMasked256 OpAMD64VPXORQMasked256 OpAMD64VPMAXSQ256 OpAMD64VPMINSQ256 OpAMD64VPMULLQ256 OpAMD64VPOPCNTQ256 + OpAMD64VPROLVQ256 + OpAMD64VPRORVQ256 + OpAMD64VPSLLQ256 + OpAMD64VPSRLQ256 + OpAMD64VPSRAQ256 + OpAMD64VPSLLVQ256 + OpAMD64VPSHLDVQ256 + OpAMD64VPSRLVQ256 + OpAMD64VPSHRDVQ256 + OpAMD64VPSRAVQ256 OpAMD64VPSUBQ256 OpAMD64VPABSQ512 OpAMD64VPADDQ512 @@ -1641,6 +1765,16 @@ const ( OpAMD64VPMULLQMasked512 OpAMD64VPORQMasked512 OpAMD64VPOPCNTQMasked512 + OpAMD64VPROLVQMasked512 + OpAMD64VPRORVQMasked512 + OpAMD64VPSLLQMasked512 + OpAMD64VPSRLQMasked512 + OpAMD64VPSRAQMasked512 + OpAMD64VPSLLVQMasked512 + OpAMD64VPSHLDVQMasked512 + OpAMD64VPSRLVQMasked512 + OpAMD64VPSHRDVQMasked512 + OpAMD64VPSRAVQMasked512 OpAMD64VPSUBQMasked512 OpAMD64VPXORQMasked512 OpAMD64VPMAXSQ512 @@ -1649,6 +1783,16 @@ const ( OpAMD64VPMULLQ512 OpAMD64VPORQ512 OpAMD64VPOPCNTQ512 + OpAMD64VPROLVQ512 + OpAMD64VPRORVQ512 + OpAMD64VPSLLQ512 + OpAMD64VPSRLQ512 + OpAMD64VPSRAQ512 + OpAMD64VPSLLVQ512 + OpAMD64VPSHLDVQ512 + OpAMD64VPSRLVQ512 + OpAMD64VPSHRDVQ512 + OpAMD64VPSRAVQ512 OpAMD64VPSUBQ512 OpAMD64VPXORQ512 OpAMD64VPABSB128 @@ -1834,28 +1978,88 @@ const ( OpAMD64VCMPPDMasked512 OpAMD64VPCMPW256 OpAMD64VPCMPWMasked256 + OpAMD64VPSHLDWMasked256 + OpAMD64VPSHRDWMasked256 + OpAMD64VPSHLDW256 + OpAMD64VPSHRDW256 OpAMD64VPCMPW512 OpAMD64VPCMPWMasked512 + OpAMD64VPSHLDWMasked512 + OpAMD64VPSHRDWMasked512 + OpAMD64VPSHLDW512 + OpAMD64VPSHRDW512 OpAMD64VPEXTRW128 OpAMD64VPCMPW128 OpAMD64VPCMPWMasked128 + OpAMD64VPSHLDWMasked128 + OpAMD64VPSHRDWMasked128 OpAMD64VPINSRW128 + OpAMD64VPSHLDW128 + OpAMD64VPSHRDW128 OpAMD64VPCMPD512 OpAMD64VPCMPDMasked512 + OpAMD64VPROLDMasked512 + OpAMD64VPRORDMasked512 + OpAMD64VPSHLDDMasked512 + OpAMD64VPSHRDDMasked512 + OpAMD64VPROLD512 + OpAMD64VPRORD512 + OpAMD64VPSHLDD512 + OpAMD64VPSHRDD512 OpAMD64VPEXTRD128 OpAMD64VPCMPD128 OpAMD64VPCMPDMasked128 + OpAMD64VPROLDMasked128 + OpAMD64VPRORDMasked128 + OpAMD64VPSHLDDMasked128 + OpAMD64VPSHRDDMasked128 + OpAMD64VPROLD128 + OpAMD64VPRORD128 OpAMD64VPINSRD128 + OpAMD64VPSHLDD128 + OpAMD64VPSHRDD128 OpAMD64VPCMPD256 OpAMD64VPCMPDMasked256 + OpAMD64VPROLDMasked256 + OpAMD64VPRORDMasked256 + OpAMD64VPSHLDDMasked256 + OpAMD64VPSHRDDMasked256 + OpAMD64VPROLD256 + OpAMD64VPRORD256 + OpAMD64VPSHLDD256 + OpAMD64VPSHRDD256 OpAMD64VPEXTRQ128 OpAMD64VPCMPQ128 OpAMD64VPCMPQMasked128 + OpAMD64VPROLQMasked128 + OpAMD64VPRORQMasked128 + OpAMD64VPSHLDQMasked128 + OpAMD64VPSHRDQMasked128 + OpAMD64VPROLQ128 + OpAMD64VPRORQ128 OpAMD64VPINSRQ128 + OpAMD64VPSHLDQ128 + OpAMD64VPSHRDQ128 OpAMD64VPCMPQ256 OpAMD64VPCMPQMasked256 + OpAMD64VPROLQMasked256 + OpAMD64VPRORQMasked256 + OpAMD64VPSHLDQMasked256 + OpAMD64VPSHRDQMasked256 + OpAMD64VPROLQ256 + OpAMD64VPRORQ256 + OpAMD64VPSHLDQ256 + OpAMD64VPSHRDQ256 OpAMD64VPCMPQ512 OpAMD64VPCMPQMasked512 + OpAMD64VPROLQMasked512 + OpAMD64VPRORQMasked512 + OpAMD64VPSHLDQMasked512 + OpAMD64VPSHRDQMasked512 + OpAMD64VPROLQ512 + OpAMD64VPRORQ512 + OpAMD64VPSHLDQ512 + OpAMD64VPSHRDQ512 OpAMD64VPEXTRB128 OpAMD64VPCMPB128 OpAMD64VPCMPBMasked128 @@ -4456,6 +4660,11 @@ const ( OpMaskedPopCountInt16x16 OpMaskedSaturatedAddInt16x16 OpMaskedSaturatedSubInt16x16 + OpMaskedShiftLeftInt16x16 + OpMaskedShiftLeftAndFillUpperFromInt16x16 + OpMaskedShiftRightInt16x16 + OpMaskedShiftRightAndFillUpperFromInt16x16 + OpMaskedShiftRightSignExtendedInt16x16 OpMaskedSubInt16x16 OpMaxInt16x16 OpMinInt16x16 @@ -4471,6 +4680,14 @@ const ( OpSaturatedPairwiseAddInt16x16 OpSaturatedPairwiseSubInt16x16 OpSaturatedSubInt16x16 + OpShiftAllLeftInt16x16 + OpShiftAllRightInt16x16 + OpShiftAllRightSignExtendedInt16x16 + OpShiftLeftInt16x16 + OpShiftLeftAndFillUpperFromInt16x16 + OpShiftRightInt16x16 + OpShiftRightAndFillUpperFromInt16x16 + OpShiftRightSignExtendedInt16x16 OpSignInt16x16 OpSubInt16x16 OpXorInt16x16 @@ -4497,6 +4714,11 @@ const ( OpMaskedPopCountInt16x32 OpMaskedSaturatedAddInt16x32 OpMaskedSaturatedSubInt16x32 + OpMaskedShiftLeftInt16x32 + OpMaskedShiftLeftAndFillUpperFromInt16x32 + OpMaskedShiftRightInt16x32 + OpMaskedShiftRightAndFillUpperFromInt16x32 + OpMaskedShiftRightSignExtendedInt16x32 OpMaskedSubInt16x32 OpMaxInt16x32 OpMinInt16x32 @@ -4507,6 +4729,11 @@ const ( OpPopCountInt16x32 OpSaturatedAddInt16x32 OpSaturatedSubInt16x32 + OpShiftLeftInt16x32 + OpShiftLeftAndFillUpperFromInt16x32 + OpShiftRightInt16x32 + OpShiftRightAndFillUpperFromInt16x32 + OpShiftRightSignExtendedInt16x32 OpSubInt16x32 OpAbsoluteInt16x8 OpAddInt16x8 @@ -4533,6 +4760,11 @@ const ( OpMaskedPopCountInt16x8 OpMaskedSaturatedAddInt16x8 OpMaskedSaturatedSubInt16x8 + OpMaskedShiftLeftInt16x8 + OpMaskedShiftLeftAndFillUpperFromInt16x8 + OpMaskedShiftRightInt16x8 + OpMaskedShiftRightAndFillUpperFromInt16x8 + OpMaskedShiftRightSignExtendedInt16x8 OpMaskedSubInt16x8 OpMaxInt16x8 OpMinInt16x8 @@ -4548,6 +4780,14 @@ const ( OpSaturatedPairwiseAddInt16x8 OpSaturatedPairwiseSubInt16x8 OpSaturatedSubInt16x8 + OpShiftAllLeftInt16x8 + OpShiftAllRightInt16x8 + OpShiftAllRightSignExtendedInt16x8 + OpShiftLeftInt16x8 + OpShiftLeftAndFillUpperFromInt16x8 + OpShiftRightInt16x8 + OpShiftRightAndFillUpperFromInt16x8 + OpShiftRightSignExtendedInt16x8 OpSignInt16x8 OpSubInt16x8 OpXorInt16x8 @@ -4576,8 +4816,15 @@ const ( OpMaskedOrInt32x16 OpMaskedPairDotProdAccumulateInt32x16 OpMaskedPopCountInt32x16 + OpMaskedRotateLeftInt32x16 + OpMaskedRotateRightInt32x16 OpMaskedSaturatedPairDotProdAccumulateInt32x16 OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 + OpMaskedShiftLeftInt32x16 + OpMaskedShiftLeftAndFillUpperFromInt32x16 + OpMaskedShiftRightInt32x16 + OpMaskedShiftRightAndFillUpperFromInt32x16 + OpMaskedShiftRightSignExtendedInt32x16 OpMaskedSubInt32x16 OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x16 OpMaskedXorInt32x16 @@ -4588,8 +4835,15 @@ const ( OpOrInt32x16 OpPairDotProdAccumulateInt32x16 OpPopCountInt32x16 + OpRotateLeftInt32x16 + OpRotateRightInt32x16 OpSaturatedPairDotProdAccumulateInt32x16 OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 + OpShiftLeftInt32x16 + OpShiftLeftAndFillUpperFromInt32x16 + OpShiftRightInt32x16 + OpShiftRightAndFillUpperFromInt32x16 + OpShiftRightSignExtendedInt32x16 OpSubInt32x16 OpUnsignedSignedQuadDotProdAccumulateInt32x16 OpXorInt32x16 @@ -4618,8 +4872,15 @@ const ( OpMaskedOrInt32x4 OpMaskedPairDotProdAccumulateInt32x4 OpMaskedPopCountInt32x4 + OpMaskedRotateLeftInt32x4 + OpMaskedRotateRightInt32x4 OpMaskedSaturatedPairDotProdAccumulateInt32x4 OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 + OpMaskedShiftLeftInt32x4 + OpMaskedShiftLeftAndFillUpperFromInt32x4 + OpMaskedShiftRightInt32x4 + OpMaskedShiftRightAndFillUpperFromInt32x4 + OpMaskedShiftRightSignExtendedInt32x4 OpMaskedSubInt32x4 OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x4 OpMaskedXorInt32x4 @@ -4633,8 +4894,18 @@ const ( OpPairwiseAddInt32x4 OpPairwiseSubInt32x4 OpPopCountInt32x4 + OpRotateLeftInt32x4 + OpRotateRightInt32x4 OpSaturatedPairDotProdAccumulateInt32x4 OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 + OpShiftAllLeftInt32x4 + OpShiftAllRightInt32x4 + OpShiftAllRightSignExtendedInt32x4 + OpShiftLeftInt32x4 + OpShiftLeftAndFillUpperFromInt32x4 + OpShiftRightInt32x4 + OpShiftRightAndFillUpperFromInt32x4 + OpShiftRightSignExtendedInt32x4 OpSignInt32x4 OpSubInt32x4 OpUnsignedSignedQuadDotProdAccumulateInt32x4 @@ -4664,8 +4935,15 @@ const ( OpMaskedOrInt32x8 OpMaskedPairDotProdAccumulateInt32x8 OpMaskedPopCountInt32x8 + OpMaskedRotateLeftInt32x8 + OpMaskedRotateRightInt32x8 OpMaskedSaturatedPairDotProdAccumulateInt32x8 OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 + OpMaskedShiftLeftInt32x8 + OpMaskedShiftLeftAndFillUpperFromInt32x8 + OpMaskedShiftRightInt32x8 + OpMaskedShiftRightAndFillUpperFromInt32x8 + OpMaskedShiftRightSignExtendedInt32x8 OpMaskedSubInt32x8 OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x8 OpMaskedXorInt32x8 @@ -4679,8 +4957,18 @@ const ( OpPairwiseAddInt32x8 OpPairwiseSubInt32x8 OpPopCountInt32x8 + OpRotateLeftInt32x8 + OpRotateRightInt32x8 OpSaturatedPairDotProdAccumulateInt32x8 OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 + OpShiftAllLeftInt32x8 + OpShiftAllRightInt32x8 + OpShiftAllRightSignExtendedInt32x8 + OpShiftLeftInt32x8 + OpShiftLeftAndFillUpperFromInt32x8 + OpShiftRightInt32x8 + OpShiftRightAndFillUpperFromInt32x8 + OpShiftRightSignExtendedInt32x8 OpSignInt32x8 OpSubInt32x8 OpUnsignedSignedQuadDotProdAccumulateInt32x8 @@ -4710,6 +4998,16 @@ const ( OpMaskedNotEqualInt64x2 OpMaskedOrInt64x2 OpMaskedPopCountInt64x2 + OpMaskedRotateLeftInt64x2 + OpMaskedRotateRightInt64x2 + OpMaskedShiftAllLeftInt64x2 + OpMaskedShiftAllRightInt64x2 + OpMaskedShiftAllRightSignExtendedInt64x2 + OpMaskedShiftLeftInt64x2 + OpMaskedShiftLeftAndFillUpperFromInt64x2 + OpMaskedShiftRightInt64x2 + OpMaskedShiftRightAndFillUpperFromInt64x2 + OpMaskedShiftRightSignExtendedInt64x2 OpMaskedSubInt64x2 OpMaskedXorInt64x2 OpMaxInt64x2 @@ -4719,6 +5017,16 @@ const ( OpNotEqualInt64x2 OpOrInt64x2 OpPopCountInt64x2 + OpRotateLeftInt64x2 + OpRotateRightInt64x2 + OpShiftAllLeftInt64x2 + OpShiftAllRightInt64x2 + OpShiftAllRightSignExtendedInt64x2 + OpShiftLeftInt64x2 + OpShiftLeftAndFillUpperFromInt64x2 + OpShiftRightInt64x2 + OpShiftRightAndFillUpperFromInt64x2 + OpShiftRightSignExtendedInt64x2 OpSubInt64x2 OpXorInt64x2 OpAbsoluteInt64x4 @@ -4746,6 +5054,16 @@ const ( OpMaskedNotEqualInt64x4 OpMaskedOrInt64x4 OpMaskedPopCountInt64x4 + OpMaskedRotateLeftInt64x4 + OpMaskedRotateRightInt64x4 + OpMaskedShiftAllLeftInt64x4 + OpMaskedShiftAllRightInt64x4 + OpMaskedShiftAllRightSignExtendedInt64x4 + OpMaskedShiftLeftInt64x4 + OpMaskedShiftLeftAndFillUpperFromInt64x4 + OpMaskedShiftRightInt64x4 + OpMaskedShiftRightAndFillUpperFromInt64x4 + OpMaskedShiftRightSignExtendedInt64x4 OpMaskedSubInt64x4 OpMaskedXorInt64x4 OpMaxInt64x4 @@ -4755,6 +5073,16 @@ const ( OpNotEqualInt64x4 OpOrInt64x4 OpPopCountInt64x4 + OpRotateLeftInt64x4 + OpRotateRightInt64x4 + OpShiftAllLeftInt64x4 + OpShiftAllRightInt64x4 + OpShiftAllRightSignExtendedInt64x4 + OpShiftLeftInt64x4 + OpShiftLeftAndFillUpperFromInt64x4 + OpShiftRightInt64x4 + OpShiftRightAndFillUpperFromInt64x4 + OpShiftRightSignExtendedInt64x4 OpSubInt64x4 OpXorInt64x4 OpAbsoluteInt64x8 @@ -4782,6 +5110,16 @@ const ( OpMaskedNotEqualInt64x8 OpMaskedOrInt64x8 OpMaskedPopCountInt64x8 + OpMaskedRotateLeftInt64x8 + OpMaskedRotateRightInt64x8 + OpMaskedShiftAllLeftInt64x8 + OpMaskedShiftAllRightInt64x8 + OpMaskedShiftAllRightSignExtendedInt64x8 + OpMaskedShiftLeftInt64x8 + OpMaskedShiftLeftAndFillUpperFromInt64x8 + OpMaskedShiftRightInt64x8 + OpMaskedShiftRightAndFillUpperFromInt64x8 + OpMaskedShiftRightSignExtendedInt64x8 OpMaskedSubInt64x8 OpMaskedXorInt64x8 OpMaxInt64x8 @@ -4791,6 +5129,16 @@ const ( OpNotEqualInt64x8 OpOrInt64x8 OpPopCountInt64x8 + OpRotateLeftInt64x8 + OpRotateRightInt64x8 + OpShiftAllLeftInt64x8 + OpShiftAllRightInt64x8 + OpShiftAllRightSignExtendedInt64x8 + OpShiftLeftInt64x8 + OpShiftLeftAndFillUpperFromInt64x8 + OpShiftRightInt64x8 + OpShiftRightAndFillUpperFromInt64x8 + OpShiftRightSignExtendedInt64x8 OpSubInt64x8 OpXorInt64x8 OpAbsoluteInt8x16 @@ -4910,6 +5258,11 @@ const ( OpMaskedPopCountUint16x16 OpMaskedSaturatedAddUint16x16 OpMaskedSaturatedSubUint16x16 + OpMaskedShiftLeftUint16x16 + OpMaskedShiftLeftAndFillUpperFromUint16x16 + OpMaskedShiftRightUint16x16 + OpMaskedShiftRightAndFillUpperFromUint16x16 + OpMaskedShiftRightSignExtendedUint16x16 OpMaskedSubUint16x16 OpMaxUint16x16 OpMinUint16x16 @@ -4921,6 +5274,13 @@ const ( OpPopCountUint16x16 OpSaturatedAddUint16x16 OpSaturatedSubUint16x16 + OpShiftAllLeftUint16x16 + OpShiftAllRightUint16x16 + OpShiftLeftUint16x16 + OpShiftLeftAndFillUpperFromUint16x16 + OpShiftRightUint16x16 + OpShiftRightAndFillUpperFromUint16x16 + OpShiftRightSignExtendedUint16x16 OpSubUint16x16 OpXorUint16x16 OpAddUint16x32 @@ -4944,6 +5304,11 @@ const ( OpMaskedPopCountUint16x32 OpMaskedSaturatedAddUint16x32 OpMaskedSaturatedSubUint16x32 + OpMaskedShiftLeftUint16x32 + OpMaskedShiftLeftAndFillUpperFromUint16x32 + OpMaskedShiftRightUint16x32 + OpMaskedShiftRightAndFillUpperFromUint16x32 + OpMaskedShiftRightSignExtendedUint16x32 OpMaskedSubUint16x32 OpMaxUint16x32 OpMinUint16x32 @@ -4952,6 +5317,11 @@ const ( OpPopCountUint16x32 OpSaturatedAddUint16x32 OpSaturatedSubUint16x32 + OpShiftLeftUint16x32 + OpShiftLeftAndFillUpperFromUint16x32 + OpShiftRightUint16x32 + OpShiftRightAndFillUpperFromUint16x32 + OpShiftRightSignExtendedUint16x32 OpSubUint16x32 OpAddUint16x8 OpAndUint16x8 @@ -4976,6 +5346,11 @@ const ( OpMaskedPopCountUint16x8 OpMaskedSaturatedAddUint16x8 OpMaskedSaturatedSubUint16x8 + OpMaskedShiftLeftUint16x8 + OpMaskedShiftLeftAndFillUpperFromUint16x8 + OpMaskedShiftRightUint16x8 + OpMaskedShiftRightAndFillUpperFromUint16x8 + OpMaskedShiftRightSignExtendedUint16x8 OpMaskedSubUint16x8 OpMaxUint16x8 OpMinUint16x8 @@ -4987,6 +5362,13 @@ const ( OpPopCountUint16x8 OpSaturatedAddUint16x8 OpSaturatedSubUint16x8 + OpShiftAllLeftUint16x8 + OpShiftAllRightUint16x8 + OpShiftLeftUint16x8 + OpShiftLeftAndFillUpperFromUint16x8 + OpShiftRightUint16x8 + OpShiftRightAndFillUpperFromUint16x8 + OpShiftRightSignExtendedUint16x8 OpSubUint16x8 OpXorUint16x8 OpAddUint32x16 @@ -5010,7 +5392,14 @@ const ( OpMaskedNotEqualUint32x16 OpMaskedOrUint32x16 OpMaskedPopCountUint32x16 + OpMaskedRotateLeftUint32x16 + OpMaskedRotateRightUint32x16 OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16 + OpMaskedShiftLeftUint32x16 + OpMaskedShiftLeftAndFillUpperFromUint32x16 + OpMaskedShiftRightUint32x16 + OpMaskedShiftRightAndFillUpperFromUint32x16 + OpMaskedShiftRightSignExtendedUint32x16 OpMaskedSubUint32x16 OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x16 OpMaskedXorUint32x16 @@ -5019,7 +5408,14 @@ const ( OpNotEqualUint32x16 OpOrUint32x16 OpPopCountUint32x16 + OpRotateLeftUint32x16 + OpRotateRightUint32x16 OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16 + OpShiftLeftUint32x16 + OpShiftLeftAndFillUpperFromUint32x16 + OpShiftRightUint32x16 + OpShiftRightAndFillUpperFromUint32x16 + OpShiftRightSignExtendedUint32x16 OpSubUint32x16 OpUnsignedSignedQuadDotProdAccumulateUint32x16 OpXorUint32x16 @@ -5044,7 +5440,14 @@ const ( OpMaskedNotEqualUint32x4 OpMaskedOrUint32x4 OpMaskedPopCountUint32x4 + OpMaskedRotateLeftUint32x4 + OpMaskedRotateRightUint32x4 OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4 + OpMaskedShiftLeftUint32x4 + OpMaskedShiftLeftAndFillUpperFromUint32x4 + OpMaskedShiftRightUint32x4 + OpMaskedShiftRightAndFillUpperFromUint32x4 + OpMaskedShiftRightSignExtendedUint32x4 OpMaskedSubUint32x4 OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x4 OpMaskedXorUint32x4 @@ -5056,7 +5459,16 @@ const ( OpPairwiseAddUint32x4 OpPairwiseSubUint32x4 OpPopCountUint32x4 + OpRotateLeftUint32x4 + OpRotateRightUint32x4 OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4 + OpShiftAllLeftUint32x4 + OpShiftAllRightUint32x4 + OpShiftLeftUint32x4 + OpShiftLeftAndFillUpperFromUint32x4 + OpShiftRightUint32x4 + OpShiftRightAndFillUpperFromUint32x4 + OpShiftRightSignExtendedUint32x4 OpSubUint32x4 OpUnsignedSignedQuadDotProdAccumulateUint32x4 OpXorUint32x4 @@ -5081,7 +5493,14 @@ const ( OpMaskedNotEqualUint32x8 OpMaskedOrUint32x8 OpMaskedPopCountUint32x8 + OpMaskedRotateLeftUint32x8 + OpMaskedRotateRightUint32x8 OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8 + OpMaskedShiftLeftUint32x8 + OpMaskedShiftLeftAndFillUpperFromUint32x8 + OpMaskedShiftRightUint32x8 + OpMaskedShiftRightAndFillUpperFromUint32x8 + OpMaskedShiftRightSignExtendedUint32x8 OpMaskedSubUint32x8 OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x8 OpMaskedXorUint32x8 @@ -5093,7 +5512,16 @@ const ( OpPairwiseAddUint32x8 OpPairwiseSubUint32x8 OpPopCountUint32x8 + OpRotateLeftUint32x8 + OpRotateRightUint32x8 OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8 + OpShiftAllLeftUint32x8 + OpShiftAllRightUint32x8 + OpShiftLeftUint32x8 + OpShiftLeftAndFillUpperFromUint32x8 + OpShiftRightUint32x8 + OpShiftRightAndFillUpperFromUint32x8 + OpShiftRightSignExtendedUint32x8 OpSubUint32x8 OpUnsignedSignedQuadDotProdAccumulateUint32x8 OpXorUint32x8 @@ -5119,6 +5547,15 @@ const ( OpMaskedNotEqualUint64x2 OpMaskedOrUint64x2 OpMaskedPopCountUint64x2 + OpMaskedRotateLeftUint64x2 + OpMaskedRotateRightUint64x2 + OpMaskedShiftAllLeftUint64x2 + OpMaskedShiftAllRightUint64x2 + OpMaskedShiftLeftUint64x2 + OpMaskedShiftLeftAndFillUpperFromUint64x2 + OpMaskedShiftRightUint64x2 + OpMaskedShiftRightAndFillUpperFromUint64x2 + OpMaskedShiftRightSignExtendedUint64x2 OpMaskedSubUint64x2 OpMaskedXorUint64x2 OpMaxUint64x2 @@ -5127,6 +5564,15 @@ const ( OpNotEqualUint64x2 OpOrUint64x2 OpPopCountUint64x2 + OpRotateLeftUint64x2 + OpRotateRightUint64x2 + OpShiftAllLeftUint64x2 + OpShiftAllRightUint64x2 + OpShiftLeftUint64x2 + OpShiftLeftAndFillUpperFromUint64x2 + OpShiftRightUint64x2 + OpShiftRightAndFillUpperFromUint64x2 + OpShiftRightSignExtendedUint64x2 OpSubUint64x2 OpXorUint64x2 OpAddUint64x4 @@ -5151,6 +5597,15 @@ const ( OpMaskedNotEqualUint64x4 OpMaskedOrUint64x4 OpMaskedPopCountUint64x4 + OpMaskedRotateLeftUint64x4 + OpMaskedRotateRightUint64x4 + OpMaskedShiftAllLeftUint64x4 + OpMaskedShiftAllRightUint64x4 + OpMaskedShiftLeftUint64x4 + OpMaskedShiftLeftAndFillUpperFromUint64x4 + OpMaskedShiftRightUint64x4 + OpMaskedShiftRightAndFillUpperFromUint64x4 + OpMaskedShiftRightSignExtendedUint64x4 OpMaskedSubUint64x4 OpMaskedXorUint64x4 OpMaxUint64x4 @@ -5159,6 +5614,15 @@ const ( OpNotEqualUint64x4 OpOrUint64x4 OpPopCountUint64x4 + OpRotateLeftUint64x4 + OpRotateRightUint64x4 + OpShiftAllLeftUint64x4 + OpShiftAllRightUint64x4 + OpShiftLeftUint64x4 + OpShiftLeftAndFillUpperFromUint64x4 + OpShiftRightUint64x4 + OpShiftRightAndFillUpperFromUint64x4 + OpShiftRightSignExtendedUint64x4 OpSubUint64x4 OpXorUint64x4 OpAddUint64x8 @@ -5183,6 +5647,15 @@ const ( OpMaskedNotEqualUint64x8 OpMaskedOrUint64x8 OpMaskedPopCountUint64x8 + OpMaskedRotateLeftUint64x8 + OpMaskedRotateRightUint64x8 + OpMaskedShiftAllLeftUint64x8 + OpMaskedShiftAllRightUint64x8 + OpMaskedShiftLeftUint64x8 + OpMaskedShiftLeftAndFillUpperFromUint64x8 + OpMaskedShiftRightUint64x8 + OpMaskedShiftRightAndFillUpperFromUint64x8 + OpMaskedShiftRightSignExtendedUint64x8 OpMaskedSubUint64x8 OpMaskedXorUint64x8 OpMaxUint64x8 @@ -5191,6 +5664,15 @@ const ( OpNotEqualUint64x8 OpOrUint64x8 OpPopCountUint64x8 + OpRotateLeftUint64x8 + OpRotateRightUint64x8 + OpShiftAllLeftUint64x8 + OpShiftAllRightUint64x8 + OpShiftLeftUint64x8 + OpShiftLeftAndFillUpperFromUint64x8 + OpShiftRightUint64x8 + OpShiftRightAndFillUpperFromUint64x8 + OpShiftRightSignExtendedUint64x8 OpSubUint64x8 OpXorUint64x8 OpAddUint8x16 @@ -5483,20 +5965,140 @@ const ( OpRoundWithPrecisionFloat64x8 OpTruncSuppressExceptionWithPrecisionFloat64x8 OpTruncWithPrecisionFloat64x8 + OpMaskedShiftAllLeftAndFillUpperFromInt16x16 + OpMaskedShiftAllRightAndFillUpperFromInt16x16 + OpShiftAllLeftAndFillUpperFromInt16x16 + OpShiftAllRightAndFillUpperFromInt16x16 + OpMaskedShiftAllLeftAndFillUpperFromInt16x32 + OpMaskedShiftAllRightAndFillUpperFromInt16x32 + OpShiftAllLeftAndFillUpperFromInt16x32 + OpShiftAllRightAndFillUpperFromInt16x32 OpGetElemInt16x8 + OpMaskedShiftAllLeftAndFillUpperFromInt16x8 + OpMaskedShiftAllRightAndFillUpperFromInt16x8 OpSetElemInt16x8 + OpShiftAllLeftAndFillUpperFromInt16x8 + OpShiftAllRightAndFillUpperFromInt16x8 + OpMaskedRotateAllLeftInt32x16 + OpMaskedRotateAllRightInt32x16 + OpMaskedShiftAllLeftAndFillUpperFromInt32x16 + OpMaskedShiftAllRightAndFillUpperFromInt32x16 + OpRotateAllLeftInt32x16 + OpRotateAllRightInt32x16 + OpShiftAllLeftAndFillUpperFromInt32x16 + OpShiftAllRightAndFillUpperFromInt32x16 OpGetElemInt32x4 + OpMaskedRotateAllLeftInt32x4 + OpMaskedRotateAllRightInt32x4 + OpMaskedShiftAllLeftAndFillUpperFromInt32x4 + OpMaskedShiftAllRightAndFillUpperFromInt32x4 + OpRotateAllLeftInt32x4 + OpRotateAllRightInt32x4 OpSetElemInt32x4 + OpShiftAllLeftAndFillUpperFromInt32x4 + OpShiftAllRightAndFillUpperFromInt32x4 + OpMaskedRotateAllLeftInt32x8 + OpMaskedRotateAllRightInt32x8 + OpMaskedShiftAllLeftAndFillUpperFromInt32x8 + OpMaskedShiftAllRightAndFillUpperFromInt32x8 + OpRotateAllLeftInt32x8 + OpRotateAllRightInt32x8 + OpShiftAllLeftAndFillUpperFromInt32x8 + OpShiftAllRightAndFillUpperFromInt32x8 OpGetElemInt64x2 + OpMaskedRotateAllLeftInt64x2 + OpMaskedRotateAllRightInt64x2 + OpMaskedShiftAllLeftAndFillUpperFromInt64x2 + OpMaskedShiftAllRightAndFillUpperFromInt64x2 + OpRotateAllLeftInt64x2 + OpRotateAllRightInt64x2 OpSetElemInt64x2 + OpShiftAllLeftAndFillUpperFromInt64x2 + OpShiftAllRightAndFillUpperFromInt64x2 + OpMaskedRotateAllLeftInt64x4 + OpMaskedRotateAllRightInt64x4 + OpMaskedShiftAllLeftAndFillUpperFromInt64x4 + OpMaskedShiftAllRightAndFillUpperFromInt64x4 + OpRotateAllLeftInt64x4 + OpRotateAllRightInt64x4 + OpShiftAllLeftAndFillUpperFromInt64x4 + OpShiftAllRightAndFillUpperFromInt64x4 + OpMaskedRotateAllLeftInt64x8 + OpMaskedRotateAllRightInt64x8 + OpMaskedShiftAllLeftAndFillUpperFromInt64x8 + OpMaskedShiftAllRightAndFillUpperFromInt64x8 + OpRotateAllLeftInt64x8 + OpRotateAllRightInt64x8 + OpShiftAllLeftAndFillUpperFromInt64x8 + OpShiftAllRightAndFillUpperFromInt64x8 OpGetElemInt8x16 OpSetElemInt8x16 + OpMaskedShiftAllLeftAndFillUpperFromUint16x16 + OpMaskedShiftAllRightAndFillUpperFromUint16x16 + OpShiftAllLeftAndFillUpperFromUint16x16 + OpShiftAllRightAndFillUpperFromUint16x16 + OpMaskedShiftAllLeftAndFillUpperFromUint16x32 + OpMaskedShiftAllRightAndFillUpperFromUint16x32 + OpShiftAllLeftAndFillUpperFromUint16x32 + OpShiftAllRightAndFillUpperFromUint16x32 OpGetElemUint16x8 + OpMaskedShiftAllLeftAndFillUpperFromUint16x8 + OpMaskedShiftAllRightAndFillUpperFromUint16x8 OpSetElemUint16x8 + OpShiftAllLeftAndFillUpperFromUint16x8 + OpShiftAllRightAndFillUpperFromUint16x8 + OpMaskedRotateAllLeftUint32x16 + OpMaskedRotateAllRightUint32x16 + OpMaskedShiftAllLeftAndFillUpperFromUint32x16 + OpMaskedShiftAllRightAndFillUpperFromUint32x16 + OpRotateAllLeftUint32x16 + OpRotateAllRightUint32x16 + OpShiftAllLeftAndFillUpperFromUint32x16 + OpShiftAllRightAndFillUpperFromUint32x16 OpGetElemUint32x4 + OpMaskedRotateAllLeftUint32x4 + OpMaskedRotateAllRightUint32x4 + OpMaskedShiftAllLeftAndFillUpperFromUint32x4 + OpMaskedShiftAllRightAndFillUpperFromUint32x4 + OpRotateAllLeftUint32x4 + OpRotateAllRightUint32x4 OpSetElemUint32x4 + OpShiftAllLeftAndFillUpperFromUint32x4 + OpShiftAllRightAndFillUpperFromUint32x4 + OpMaskedRotateAllLeftUint32x8 + OpMaskedRotateAllRightUint32x8 + OpMaskedShiftAllLeftAndFillUpperFromUint32x8 + OpMaskedShiftAllRightAndFillUpperFromUint32x8 + OpRotateAllLeftUint32x8 + OpRotateAllRightUint32x8 + OpShiftAllLeftAndFillUpperFromUint32x8 + OpShiftAllRightAndFillUpperFromUint32x8 OpGetElemUint64x2 + OpMaskedRotateAllLeftUint64x2 + OpMaskedRotateAllRightUint64x2 + OpMaskedShiftAllLeftAndFillUpperFromUint64x2 + OpMaskedShiftAllRightAndFillUpperFromUint64x2 + OpRotateAllLeftUint64x2 + OpRotateAllRightUint64x2 OpSetElemUint64x2 + OpShiftAllLeftAndFillUpperFromUint64x2 + OpShiftAllRightAndFillUpperFromUint64x2 + OpMaskedRotateAllLeftUint64x4 + OpMaskedRotateAllRightUint64x4 + OpMaskedShiftAllLeftAndFillUpperFromUint64x4 + OpMaskedShiftAllRightAndFillUpperFromUint64x4 + OpRotateAllLeftUint64x4 + OpRotateAllRightUint64x4 + OpShiftAllLeftAndFillUpperFromUint64x4 + OpShiftAllRightAndFillUpperFromUint64x4 + OpMaskedRotateAllLeftUint64x8 + OpMaskedRotateAllRightUint64x8 + OpMaskedShiftAllLeftAndFillUpperFromUint64x8 + OpMaskedShiftAllRightAndFillUpperFromUint64x8 + OpRotateAllLeftUint64x8 + OpRotateAllRightUint64x8 + OpShiftAllLeftAndFillUpperFromUint64x8 + OpShiftAllRightAndFillUpperFromUint64x8 OpGetElemUint8x16 OpSetElemUint8x16 ) @@ -21551,6 +22153,85 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSLLVWMasked256", + argLen: 3, + asm: x86.AVPSLLVW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDVWMasked256", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHLDVW, + reg: regInfo{ + inputs: []inputInfo{ + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLVWMasked256", + argLen: 3, + asm: x86.AVPSRLVW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDVWMasked256", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHRDVW, + reg: regInfo{ + inputs: []inputInfo{ + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAVWMasked256", + argLen: 3, + asm: x86.AVPSRAVW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPSUBWMasked256", argLen: 3, @@ -21738,6 +22419,122 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSLLW256", + argLen: 2, + asm: x86.AVPSLLW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLW256", + argLen: 2, + asm: x86.AVPSRLW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAW256", + argLen: 2, + asm: x86.AVPSRAW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLVW256", + argLen: 2, + asm: x86.AVPSLLVW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDVW256", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHLDVW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLVW256", + argLen: 2, + asm: x86.AVPSRLVW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDVW256", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHRDVW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAVW256", + argLen: 2, + asm: x86.AVPSRAVW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPSIGNW256", argLen: 2, @@ -21948,6 +22745,85 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSLLVWMasked512", + argLen: 3, + asm: x86.AVPSLLVW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDVWMasked512", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHLDVW, + reg: regInfo{ + inputs: []inputInfo{ + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLVWMasked512", + argLen: 3, + asm: x86.AVPSRLVW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDVWMasked512", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHRDVW, + reg: regInfo{ + inputs: []inputInfo{ + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAVWMasked512", + argLen: 3, + asm: x86.AVPSRAVW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPSUBWMasked512", argLen: 3, @@ -22079,6 +22955,80 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSLLVW512", + argLen: 2, + asm: x86.AVPSLLVW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDVW512", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHLDVW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLVW512", + argLen: 2, + asm: x86.AVPSRLVW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDVW512", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHRDVW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAVW512", + argLen: 2, + asm: x86.AVPSRAVW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPSUBW512", argLen: 2, @@ -22304,6 +23254,85 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSLLVWMasked128", + argLen: 3, + asm: x86.AVPSLLVW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDVWMasked128", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHLDVW, + reg: regInfo{ + inputs: []inputInfo{ + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLVWMasked128", + argLen: 3, + asm: x86.AVPSRLVW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDVWMasked128", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHRDVW, + reg: regInfo{ + inputs: []inputInfo{ + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAVWMasked128", + argLen: 3, + asm: x86.AVPSRAVW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPSUBWMasked128", argLen: 3, @@ -22491,6 +23520,122 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSLLW128", + argLen: 2, + asm: x86.AVPSLLW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLW128", + argLen: 2, + asm: x86.AVPSRLW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAW128", + argLen: 2, + asm: x86.AVPSRAW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLVW128", + argLen: 2, + asm: x86.AVPSLLVW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDVW128", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHLDVW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLVW128", + argLen: 2, + asm: x86.AVPSRLVW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDVW128", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHRDVW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAVW128", + argLen: 2, + asm: x86.AVPSRAVW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPSIGNW128", argLen: 2, @@ -22732,6 +23877,36 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPROLVDMasked512", + argLen: 3, + asm: x86.AVPROLVD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPRORVDMasked512", + argLen: 3, + asm: x86.AVPRORVD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPDPWSSDSMasked512", argLen: 4, @@ -22766,6 +23941,85 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSLLVDMasked512", + argLen: 3, + asm: x86.AVPSLLVD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDVDMasked512", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHLDVD, + reg: regInfo{ + inputs: []inputInfo{ + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLVDMasked512", + argLen: 3, + asm: x86.AVPSRLVD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDVDMasked512", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHRDVD, + reg: regInfo{ + inputs: []inputInfo{ + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAVDMasked512", + argLen: 3, + asm: x86.AVPSRAVD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPSUBDMasked512", argLen: 3, @@ -22903,6 +24157,34 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPROLVD512", + argLen: 2, + asm: x86.AVPROLVD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPRORVD512", + argLen: 2, + asm: x86.AVPRORVD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPDPWSSDS512", argLen: 3, @@ -22935,6 +24217,80 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSLLVD512", + argLen: 2, + asm: x86.AVPSLLVD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDVD512", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHLDVD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLVD512", + argLen: 2, + asm: x86.AVPSRLVD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDVD512", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHRDVD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAVD512", + argLen: 2, + asm: x86.AVPSRAVD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPSUBD512", argLen: 2, @@ -23193,6 +24549,36 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPROLVDMasked128", + argLen: 3, + asm: x86.AVPROLVD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPRORVDMasked128", + argLen: 3, + asm: x86.AVPRORVD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPDPWSSDSMasked128", argLen: 4, @@ -23227,6 +24613,85 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSLLVDMasked128", + argLen: 3, + asm: x86.AVPSLLVD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDVDMasked128", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHLDVD, + reg: regInfo{ + inputs: []inputInfo{ + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLVDMasked128", + argLen: 3, + asm: x86.AVPSRLVD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDVDMasked128", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHRDVD, + reg: regInfo{ + inputs: []inputInfo{ + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAVDMasked128", + argLen: 3, + asm: x86.AVPSRAVD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPSUBDMasked128", argLen: 3, @@ -23392,6 +24857,34 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPROLVD128", + argLen: 2, + asm: x86.AVPROLVD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPRORVD128", + argLen: 2, + asm: x86.AVPRORVD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPDPWSSDS128", argLen: 3, @@ -23424,6 +24917,122 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSLLD128", + argLen: 2, + asm: x86.AVPSLLD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLD128", + argLen: 2, + asm: x86.AVPSRLD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAD128", + argLen: 2, + asm: x86.AVPSRAD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLVD128", + argLen: 2, + asm: x86.AVPSLLVD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDVD128", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHLDVD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLVD128", + argLen: 2, + asm: x86.AVPSRLVD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDVD128", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHRDVD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAVD128", + argLen: 2, + asm: x86.AVPSRAVD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPSIGND128", argLen: 2, @@ -23681,6 +25290,36 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPROLVDMasked256", + argLen: 3, + asm: x86.AVPROLVD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPRORVDMasked256", + argLen: 3, + asm: x86.AVPRORVD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPDPWSSDSMasked256", argLen: 4, @@ -23715,6 +25354,85 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSLLVDMasked256", + argLen: 3, + asm: x86.AVPSLLVD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDVDMasked256", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHLDVD, + reg: regInfo{ + inputs: []inputInfo{ + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLVDMasked256", + argLen: 3, + asm: x86.AVPSRLVD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDVDMasked256", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHRDVD, + reg: regInfo{ + inputs: []inputInfo{ + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAVDMasked256", + argLen: 3, + asm: x86.AVPSRAVD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPSUBDMasked256", argLen: 3, @@ -23880,6 +25598,34 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPROLVD256", + argLen: 2, + asm: x86.AVPROLVD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPRORVD256", + argLen: 2, + asm: x86.AVPRORVD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPDPWSSDS256", argLen: 3, @@ -23912,6 +25658,122 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSLLD256", + argLen: 2, + asm: x86.AVPSLLD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLD256", + argLen: 2, + asm: x86.AVPSRLD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAD256", + argLen: 2, + asm: x86.AVPSRAD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLVD256", + argLen: 2, + asm: x86.AVPSLLVD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDVD256", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHLDVD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLVD256", + argLen: 2, + asm: x86.AVPSRLVD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDVD256", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHRDVD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAVD256", + argLen: 2, + asm: x86.AVPSRAVD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPSIGND256", argLen: 2, @@ -24154,6 +26016,160 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPROLVQMasked128", + argLen: 3, + asm: x86.AVPROLVQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPRORVQMasked128", + argLen: 3, + asm: x86.AVPRORVQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLQMasked128", + argLen: 3, + asm: x86.AVPSLLQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLQMasked128", + argLen: 3, + asm: x86.AVPSRLQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAQMasked128", + argLen: 3, + asm: x86.AVPSRAQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLVQMasked128", + argLen: 3, + asm: x86.AVPSLLVQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDVQMasked128", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHLDVQ, + reg: regInfo{ + inputs: []inputInfo{ + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLVQMasked128", + argLen: 3, + asm: x86.AVPSRLVQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDVQMasked128", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHRDVQ, + reg: regInfo{ + inputs: []inputInfo{ + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAVQMasked128", + argLen: 3, + asm: x86.AVPSRAVQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPSUBQMasked128", argLen: 3, @@ -24243,6 +26259,150 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPROLVQ128", + argLen: 2, + asm: x86.AVPROLVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPRORVQ128", + argLen: 2, + asm: x86.AVPRORVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLQ128", + argLen: 2, + asm: x86.AVPSLLQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLQ128", + argLen: 2, + asm: x86.AVPSRLQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAQ128", + argLen: 2, + asm: x86.AVPSRAQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLVQ128", + argLen: 2, + asm: x86.AVPSLLVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDVQ128", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHLDVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLVQ128", + argLen: 2, + asm: x86.AVPSRLVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDVQ128", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHRDVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAVQ128", + argLen: 2, + asm: x86.AVPSRAVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPSUBQ128", argLen: 2, @@ -24469,6 +26629,160 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPROLVQMasked256", + argLen: 3, + asm: x86.AVPROLVQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPRORVQMasked256", + argLen: 3, + asm: x86.AVPRORVQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLQMasked256", + argLen: 3, + asm: x86.AVPSLLQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLQMasked256", + argLen: 3, + asm: x86.AVPSRLQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAQMasked256", + argLen: 3, + asm: x86.AVPSRAQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLVQMasked256", + argLen: 3, + asm: x86.AVPSLLVQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDVQMasked256", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHLDVQ, + reg: regInfo{ + inputs: []inputInfo{ + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLVQMasked256", + argLen: 3, + asm: x86.AVPSRLVQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDVQMasked256", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHRDVQ, + reg: regInfo{ + inputs: []inputInfo{ + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAVQMasked256", + argLen: 3, + asm: x86.AVPSRAVQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPSUBQMasked256", argLen: 3, @@ -24558,6 +26872,150 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPROLVQ256", + argLen: 2, + asm: x86.AVPROLVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPRORVQ256", + argLen: 2, + asm: x86.AVPRORVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLQ256", + argLen: 2, + asm: x86.AVPSLLQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLQ256", + argLen: 2, + asm: x86.AVPSRLQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAQ256", + argLen: 2, + asm: x86.AVPSRAQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLVQ256", + argLen: 2, + asm: x86.AVPSLLVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDVQ256", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHLDVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLVQ256", + argLen: 2, + asm: x86.AVPSRLVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDVQ256", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHRDVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAVQ256", + argLen: 2, + asm: x86.AVPSRAVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPSUBQ256", argLen: 2, @@ -24784,6 +27242,160 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPROLVQMasked512", + argLen: 3, + asm: x86.AVPROLVQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPRORVQMasked512", + argLen: 3, + asm: x86.AVPRORVQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLQMasked512", + argLen: 3, + asm: x86.AVPSLLQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLQMasked512", + argLen: 3, + asm: x86.AVPSRLQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAQMasked512", + argLen: 3, + asm: x86.AVPSRAQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLVQMasked512", + argLen: 3, + asm: x86.AVPSLLVQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDVQMasked512", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHLDVQ, + reg: regInfo{ + inputs: []inputInfo{ + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLVQMasked512", + argLen: 3, + asm: x86.AVPSRLVQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDVQMasked512", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHRDVQ, + reg: regInfo{ + inputs: []inputInfo{ + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAVQMasked512", + argLen: 3, + asm: x86.AVPSRAVQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPSUBQMasked512", argLen: 3, @@ -24903,6 +27515,150 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPROLVQ512", + argLen: 2, + asm: x86.AVPROLVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPRORVQ512", + argLen: 2, + asm: x86.AVPRORVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLQ512", + argLen: 2, + asm: x86.AVPSLLQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLQ512", + argLen: 2, + asm: x86.AVPSRLQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAQ512", + argLen: 2, + asm: x86.AVPSRAQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLVQ512", + argLen: 2, + asm: x86.AVPSLLVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDVQ512", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHLDVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLVQ512", + argLen: 2, + asm: x86.AVPSRLVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDVQ512", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHRDVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAVQ512", + argLen: 2, + asm: x86.AVPSRAVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPSUBQ512", argLen: 2, @@ -27697,6 +30453,68 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSHLDWMasked256", + auxType: auxInt8, + argLen: 3, + asm: x86.AVPSHLDW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDWMasked256", + auxType: auxInt8, + argLen: 3, + asm: x86.AVPSHRDW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDW256", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSHLDW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDW256", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSHRDW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPCMPW512", auxType: auxInt8, @@ -27730,6 +30548,68 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSHLDWMasked512", + auxType: auxInt8, + argLen: 3, + asm: x86.AVPSHLDW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDWMasked512", + auxType: auxInt8, + argLen: 3, + asm: x86.AVPSHRDW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDW512", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSHLDW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDW512", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSHRDW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPEXTRW128", auxType: auxInt8, @@ -27776,6 +30656,38 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSHLDWMasked128", + auxType: auxInt8, + argLen: 3, + asm: x86.AVPSHLDW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDWMasked128", + auxType: auxInt8, + argLen: 3, + asm: x86.AVPSHRDW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPINSRW128", auxType: auxInt8, @@ -27791,6 +30703,36 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSHLDW128", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSHLDW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDW128", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSHRDW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPCMPD512", auxType: auxInt8, @@ -27824,6 +30766,126 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPROLDMasked512", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPROLD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPRORDMasked512", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPRORD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDDMasked512", + auxType: auxInt8, + argLen: 3, + asm: x86.AVPSHLDD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDDMasked512", + auxType: auxInt8, + argLen: 3, + asm: x86.AVPSHRDD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPROLD512", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPROLD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPRORD512", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPRORD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDD512", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSHLDD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDD512", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSHRDD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPEXTRD128", auxType: auxInt8, @@ -27870,6 +30932,96 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPROLDMasked128", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPROLD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPRORDMasked128", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPRORD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDDMasked128", + auxType: auxInt8, + argLen: 3, + asm: x86.AVPSHLDD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDDMasked128", + auxType: auxInt8, + argLen: 3, + asm: x86.AVPSHRDD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPROLD128", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPROLD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPRORD128", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPRORD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPINSRD128", auxType: auxInt8, @@ -27885,6 +31037,36 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSHLDD128", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSHLDD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDD128", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSHRDD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPCMPD256", auxType: auxInt8, @@ -27917,6 +31099,126 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPROLDMasked256", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPROLD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPRORDMasked256", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPRORD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDDMasked256", + auxType: auxInt8, + argLen: 3, + asm: x86.AVPSHLDD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDDMasked256", + auxType: auxInt8, + argLen: 3, + asm: x86.AVPSHRDD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPROLD256", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPROLD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPRORD256", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPRORD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDD256", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSHLDD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDD256", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSHRDD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPEXTRQ128", auxType: auxInt8, @@ -27963,6 +31265,96 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPROLQMasked128", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPROLQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPRORQMasked128", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPRORQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDQMasked128", + auxType: auxInt8, + argLen: 3, + asm: x86.AVPSHLDQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDQMasked128", + auxType: auxInt8, + argLen: 3, + asm: x86.AVPSHRDQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPROLQ128", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPROLQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPRORQ128", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPRORQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPINSRQ128", auxType: auxInt8, @@ -27978,6 +31370,36 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSHLDQ128", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSHLDQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDQ128", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSHRDQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPCMPQ256", auxType: auxInt8, @@ -28010,6 +31432,126 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPROLQMasked256", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPROLQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPRORQMasked256", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPRORQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDQMasked256", + auxType: auxInt8, + argLen: 3, + asm: x86.AVPSHLDQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDQMasked256", + auxType: auxInt8, + argLen: 3, + asm: x86.AVPSHRDQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPROLQ256", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPROLQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPRORQ256", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPRORQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDQ256", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSHLDQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDQ256", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSHRDQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPCMPQ512", auxType: auxInt8, @@ -28043,6 +31585,126 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPROLQMasked512", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPROLQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPRORQMasked512", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPRORQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDQMasked512", + auxType: auxInt8, + argLen: 3, + asm: x86.AVPSHLDQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDQMasked512", + auxType: auxInt8, + argLen: 3, + asm: x86.AVPSHRDQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPROLQ512", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPROLQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPRORQ512", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPRORQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHLDQ512", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSHLDQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSHRDQ512", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSHRDQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPEXTRB128", auxType: auxInt8, @@ -57559,6 +61221,31 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "MaskedShiftLeftInt16x16", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftLeftAndFillUpperFromInt16x16", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightInt16x16", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftRightAndFillUpperFromInt16x16", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightSignExtendedInt16x16", + argLen: 3, + generic: true, + }, { name: "MaskedSubInt16x16", argLen: 3, @@ -57641,6 +61328,46 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "ShiftAllLeftInt16x16", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightInt16x16", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightSignExtendedInt16x16", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftInt16x16", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftAndFillUpperFromInt16x16", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightInt16x16", + argLen: 2, + generic: true, + }, + { + name: "ShiftRightAndFillUpperFromInt16x16", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightSignExtendedInt16x16", + argLen: 2, + generic: true, + }, { name: "SignInt16x16", argLen: 2, @@ -57782,6 +61509,31 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "MaskedShiftLeftInt16x32", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftLeftAndFillUpperFromInt16x32", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightInt16x32", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftRightAndFillUpperFromInt16x32", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightSignExtendedInt16x32", + argLen: 3, + generic: true, + }, { name: "MaskedSubInt16x32", argLen: 3, @@ -57838,6 +61590,31 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "ShiftLeftInt16x32", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftAndFillUpperFromInt16x32", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightInt16x32", + argLen: 2, + generic: true, + }, + { + name: "ShiftRightAndFillUpperFromInt16x32", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightSignExtendedInt16x32", + argLen: 2, + generic: true, + }, { name: "SubInt16x32", argLen: 2, @@ -57979,6 +61756,31 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "MaskedShiftLeftInt16x8", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftLeftAndFillUpperFromInt16x8", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightInt16x8", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftRightAndFillUpperFromInt16x8", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightSignExtendedInt16x8", + argLen: 3, + generic: true, + }, { name: "MaskedSubInt16x8", argLen: 3, @@ -58061,6 +61863,46 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "ShiftAllLeftInt16x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightInt16x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightSignExtendedInt16x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftInt16x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftAndFillUpperFromInt16x8", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightInt16x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftRightAndFillUpperFromInt16x8", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightSignExtendedInt16x8", + argLen: 2, + generic: true, + }, { name: "SignInt16x8", argLen: 2, @@ -58213,6 +62055,16 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "MaskedRotateLeftInt32x16", + argLen: 3, + generic: true, + }, + { + name: "MaskedRotateRightInt32x16", + argLen: 3, + generic: true, + }, { name: "MaskedSaturatedPairDotProdAccumulateInt32x16", argLen: 4, @@ -58223,6 +62075,31 @@ var opcodeTable = [...]opInfo{ argLen: 4, generic: true, }, + { + name: "MaskedShiftLeftInt32x16", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftLeftAndFillUpperFromInt32x16", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightInt32x16", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftRightAndFillUpperFromInt32x16", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightSignExtendedInt32x16", + argLen: 3, + generic: true, + }, { name: "MaskedSubInt32x16", argLen: 3, @@ -58279,6 +62156,16 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "RotateLeftInt32x16", + argLen: 2, + generic: true, + }, + { + name: "RotateRightInt32x16", + argLen: 2, + generic: true, + }, { name: "SaturatedPairDotProdAccumulateInt32x16", argLen: 3, @@ -58289,6 +62176,31 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "ShiftLeftInt32x16", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftAndFillUpperFromInt32x16", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightInt32x16", + argLen: 2, + generic: true, + }, + { + name: "ShiftRightAndFillUpperFromInt32x16", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightSignExtendedInt32x16", + argLen: 2, + generic: true, + }, { name: "SubInt32x16", argLen: 2, @@ -58441,6 +62353,16 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "MaskedRotateLeftInt32x4", + argLen: 3, + generic: true, + }, + { + name: "MaskedRotateRightInt32x4", + argLen: 3, + generic: true, + }, { name: "MaskedSaturatedPairDotProdAccumulateInt32x4", argLen: 4, @@ -58451,6 +62373,31 @@ var opcodeTable = [...]opInfo{ argLen: 4, generic: true, }, + { + name: "MaskedShiftLeftInt32x4", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftLeftAndFillUpperFromInt32x4", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightInt32x4", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftRightAndFillUpperFromInt32x4", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightSignExtendedInt32x4", + argLen: 3, + generic: true, + }, { name: "MaskedSubInt32x4", argLen: 3, @@ -58523,6 +62470,16 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "RotateLeftInt32x4", + argLen: 2, + generic: true, + }, + { + name: "RotateRightInt32x4", + argLen: 2, + generic: true, + }, { name: "SaturatedPairDotProdAccumulateInt32x4", argLen: 3, @@ -58533,6 +62490,46 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "ShiftAllLeftInt32x4", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightInt32x4", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightSignExtendedInt32x4", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftInt32x4", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftAndFillUpperFromInt32x4", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightInt32x4", + argLen: 2, + generic: true, + }, + { + name: "ShiftRightAndFillUpperFromInt32x4", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightSignExtendedInt32x4", + argLen: 2, + generic: true, + }, { name: "SignInt32x4", argLen: 2, @@ -58690,6 +62687,16 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "MaskedRotateLeftInt32x8", + argLen: 3, + generic: true, + }, + { + name: "MaskedRotateRightInt32x8", + argLen: 3, + generic: true, + }, { name: "MaskedSaturatedPairDotProdAccumulateInt32x8", argLen: 4, @@ -58700,6 +62707,31 @@ var opcodeTable = [...]opInfo{ argLen: 4, generic: true, }, + { + name: "MaskedShiftLeftInt32x8", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftLeftAndFillUpperFromInt32x8", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightInt32x8", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftRightAndFillUpperFromInt32x8", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightSignExtendedInt32x8", + argLen: 3, + generic: true, + }, { name: "MaskedSubInt32x8", argLen: 3, @@ -58772,6 +62804,16 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "RotateLeftInt32x8", + argLen: 2, + generic: true, + }, + { + name: "RotateRightInt32x8", + argLen: 2, + generic: true, + }, { name: "SaturatedPairDotProdAccumulateInt32x8", argLen: 3, @@ -58782,6 +62824,46 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "ShiftAllLeftInt32x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightInt32x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightSignExtendedInt32x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftInt32x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftAndFillUpperFromInt32x8", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightInt32x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftRightAndFillUpperFromInt32x8", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightSignExtendedInt32x8", + argLen: 2, + generic: true, + }, { name: "SignInt32x8", argLen: 2, @@ -58940,6 +63022,56 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "MaskedRotateLeftInt64x2", + argLen: 3, + generic: true, + }, + { + name: "MaskedRotateRightInt64x2", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllLeftInt64x2", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightInt64x2", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightSignExtendedInt64x2", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftLeftInt64x2", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftLeftAndFillUpperFromInt64x2", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightInt64x2", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftRightAndFillUpperFromInt64x2", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightSignExtendedInt64x2", + argLen: 3, + generic: true, + }, { name: "MaskedSubInt64x2", argLen: 3, @@ -58992,6 +63124,56 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "RotateLeftInt64x2", + argLen: 2, + generic: true, + }, + { + name: "RotateRightInt64x2", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllLeftInt64x2", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightInt64x2", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightSignExtendedInt64x2", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftInt64x2", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftAndFillUpperFromInt64x2", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightInt64x2", + argLen: 2, + generic: true, + }, + { + name: "ShiftRightAndFillUpperFromInt64x2", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightSignExtendedInt64x2", + argLen: 2, + generic: true, + }, { name: "SubInt64x2", argLen: 2, @@ -59140,6 +63322,56 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "MaskedRotateLeftInt64x4", + argLen: 3, + generic: true, + }, + { + name: "MaskedRotateRightInt64x4", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllLeftInt64x4", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightInt64x4", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightSignExtendedInt64x4", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftLeftInt64x4", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftLeftAndFillUpperFromInt64x4", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightInt64x4", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftRightAndFillUpperFromInt64x4", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightSignExtendedInt64x4", + argLen: 3, + generic: true, + }, { name: "MaskedSubInt64x4", argLen: 3, @@ -59192,6 +63424,56 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "RotateLeftInt64x4", + argLen: 2, + generic: true, + }, + { + name: "RotateRightInt64x4", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllLeftInt64x4", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightInt64x4", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightSignExtendedInt64x4", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftInt64x4", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftAndFillUpperFromInt64x4", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightInt64x4", + argLen: 2, + generic: true, + }, + { + name: "ShiftRightAndFillUpperFromInt64x4", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightSignExtendedInt64x4", + argLen: 2, + generic: true, + }, { name: "SubInt64x4", argLen: 2, @@ -59340,6 +63622,56 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "MaskedRotateLeftInt64x8", + argLen: 3, + generic: true, + }, + { + name: "MaskedRotateRightInt64x8", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllLeftInt64x8", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightInt64x8", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightSignExtendedInt64x8", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftLeftInt64x8", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftLeftAndFillUpperFromInt64x8", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightInt64x8", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftRightAndFillUpperFromInt64x8", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightSignExtendedInt64x8", + argLen: 3, + generic: true, + }, { name: "MaskedSubInt64x8", argLen: 3, @@ -59392,6 +63724,56 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "RotateLeftInt64x8", + argLen: 2, + generic: true, + }, + { + name: "RotateRightInt64x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllLeftInt64x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightInt64x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightSignExtendedInt64x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftInt64x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftAndFillUpperFromInt64x8", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightInt64x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftRightAndFillUpperFromInt64x8", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightSignExtendedInt64x8", + argLen: 2, + generic: true, + }, { name: "SubInt64x8", argLen: 2, @@ -60042,6 +64424,31 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "MaskedShiftLeftUint16x16", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftLeftAndFillUpperFromUint16x16", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightUint16x16", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftRightAndFillUpperFromUint16x16", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightSignExtendedUint16x16", + argLen: 3, + generic: true, + }, { name: "MaskedSubUint16x16", argLen: 3, @@ -60103,6 +64510,41 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "ShiftAllLeftUint16x16", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightUint16x16", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftUint16x16", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftAndFillUpperFromUint16x16", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightUint16x16", + argLen: 2, + generic: true, + }, + { + name: "ShiftRightAndFillUpperFromUint16x16", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightSignExtendedUint16x16", + argLen: 2, + generic: true, + }, { name: "SubUint16x16", argLen: 2, @@ -60230,6 +64672,31 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "MaskedShiftLeftUint16x32", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftLeftAndFillUpperFromUint16x32", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightUint16x32", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftRightAndFillUpperFromUint16x32", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightSignExtendedUint16x32", + argLen: 3, + generic: true, + }, { name: "MaskedSubUint16x32", argLen: 3, @@ -60275,6 +64742,31 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "ShiftLeftUint16x32", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftAndFillUpperFromUint16x32", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightUint16x32", + argLen: 2, + generic: true, + }, + { + name: "ShiftRightAndFillUpperFromUint16x32", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightSignExtendedUint16x32", + argLen: 2, + generic: true, + }, { name: "SubUint16x32", argLen: 2, @@ -60407,6 +64899,31 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "MaskedShiftLeftUint16x8", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftLeftAndFillUpperFromUint16x8", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightUint16x8", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftRightAndFillUpperFromUint16x8", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightSignExtendedUint16x8", + argLen: 3, + generic: true, + }, { name: "MaskedSubUint16x8", argLen: 3, @@ -60468,6 +64985,41 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "ShiftAllLeftUint16x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightUint16x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftUint16x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftAndFillUpperFromUint16x8", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightUint16x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftRightAndFillUpperFromUint16x8", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightSignExtendedUint16x8", + argLen: 2, + generic: true, + }, { name: "SubUint16x8", argLen: 2, @@ -60594,11 +65146,46 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "MaskedRotateLeftUint32x16", + argLen: 3, + generic: true, + }, + { + name: "MaskedRotateRightUint32x16", + argLen: 3, + generic: true, + }, { name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16", argLen: 4, generic: true, }, + { + name: "MaskedShiftLeftUint32x16", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftLeftAndFillUpperFromUint32x16", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightUint32x16", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftRightAndFillUpperFromUint32x16", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightSignExtendedUint32x16", + argLen: 3, + generic: true, + }, { name: "MaskedSubUint32x16", argLen: 3, @@ -60644,11 +65231,46 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "RotateLeftUint32x16", + argLen: 2, + generic: true, + }, + { + name: "RotateRightUint32x16", + argLen: 2, + generic: true, + }, { name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16", argLen: 3, generic: true, }, + { + name: "ShiftLeftUint32x16", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftAndFillUpperFromUint32x16", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightUint32x16", + argLen: 2, + generic: true, + }, + { + name: "ShiftRightAndFillUpperFromUint32x16", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightSignExtendedUint32x16", + argLen: 2, + generic: true, + }, { name: "SubUint32x16", argLen: 2, @@ -60780,11 +65402,46 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "MaskedRotateLeftUint32x4", + argLen: 3, + generic: true, + }, + { + name: "MaskedRotateRightUint32x4", + argLen: 3, + generic: true, + }, { name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4", argLen: 4, generic: true, }, + { + name: "MaskedShiftLeftUint32x4", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftLeftAndFillUpperFromUint32x4", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightUint32x4", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftRightAndFillUpperFromUint32x4", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightSignExtendedUint32x4", + argLen: 3, + generic: true, + }, { name: "MaskedSubUint32x4", argLen: 3, @@ -60846,11 +65503,56 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "RotateLeftUint32x4", + argLen: 2, + generic: true, + }, + { + name: "RotateRightUint32x4", + argLen: 2, + generic: true, + }, { name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x4", argLen: 3, generic: true, }, + { + name: "ShiftAllLeftUint32x4", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightUint32x4", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftUint32x4", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftAndFillUpperFromUint32x4", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightUint32x4", + argLen: 2, + generic: true, + }, + { + name: "ShiftRightAndFillUpperFromUint32x4", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightSignExtendedUint32x4", + argLen: 2, + generic: true, + }, { name: "SubUint32x4", argLen: 2, @@ -60982,11 +65684,46 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "MaskedRotateLeftUint32x8", + argLen: 3, + generic: true, + }, + { + name: "MaskedRotateRightUint32x8", + argLen: 3, + generic: true, + }, { name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8", argLen: 4, generic: true, }, + { + name: "MaskedShiftLeftUint32x8", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftLeftAndFillUpperFromUint32x8", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightUint32x8", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftRightAndFillUpperFromUint32x8", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightSignExtendedUint32x8", + argLen: 3, + generic: true, + }, { name: "MaskedSubUint32x8", argLen: 3, @@ -61048,11 +65785,56 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "RotateLeftUint32x8", + argLen: 2, + generic: true, + }, + { + name: "RotateRightUint32x8", + argLen: 2, + generic: true, + }, { name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8", argLen: 3, generic: true, }, + { + name: "ShiftAllLeftUint32x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightUint32x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftUint32x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftAndFillUpperFromUint32x8", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightUint32x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftRightAndFillUpperFromUint32x8", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightSignExtendedUint32x8", + argLen: 2, + generic: true, + }, { name: "SubUint32x8", argLen: 2, @@ -61190,6 +65972,51 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "MaskedRotateLeftUint64x2", + argLen: 3, + generic: true, + }, + { + name: "MaskedRotateRightUint64x2", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllLeftUint64x2", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightUint64x2", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftLeftUint64x2", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftLeftAndFillUpperFromUint64x2", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightUint64x2", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftRightAndFillUpperFromUint64x2", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightSignExtendedUint64x2", + argLen: 3, + generic: true, + }, { name: "MaskedSubUint64x2", argLen: 3, @@ -61236,6 +66063,51 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "RotateLeftUint64x2", + argLen: 2, + generic: true, + }, + { + name: "RotateRightUint64x2", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllLeftUint64x2", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightUint64x2", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftUint64x2", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftAndFillUpperFromUint64x2", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightUint64x2", + argLen: 2, + generic: true, + }, + { + name: "ShiftRightAndFillUpperFromUint64x2", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightSignExtendedUint64x2", + argLen: 2, + generic: true, + }, { name: "SubUint64x2", argLen: 2, @@ -61368,6 +66240,51 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "MaskedRotateLeftUint64x4", + argLen: 3, + generic: true, + }, + { + name: "MaskedRotateRightUint64x4", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllLeftUint64x4", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightUint64x4", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftLeftUint64x4", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftLeftAndFillUpperFromUint64x4", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightUint64x4", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftRightAndFillUpperFromUint64x4", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightSignExtendedUint64x4", + argLen: 3, + generic: true, + }, { name: "MaskedSubUint64x4", argLen: 3, @@ -61414,6 +66331,51 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "RotateLeftUint64x4", + argLen: 2, + generic: true, + }, + { + name: "RotateRightUint64x4", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllLeftUint64x4", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightUint64x4", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftUint64x4", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftAndFillUpperFromUint64x4", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightUint64x4", + argLen: 2, + generic: true, + }, + { + name: "ShiftRightAndFillUpperFromUint64x4", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightSignExtendedUint64x4", + argLen: 2, + generic: true, + }, { name: "SubUint64x4", argLen: 2, @@ -61546,6 +66508,51 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "MaskedRotateLeftUint64x8", + argLen: 3, + generic: true, + }, + { + name: "MaskedRotateRightUint64x8", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllLeftUint64x8", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightUint64x8", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftLeftUint64x8", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftLeftAndFillUpperFromUint64x8", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightUint64x8", + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftRightAndFillUpperFromUint64x8", + argLen: 4, + generic: true, + }, + { + name: "MaskedShiftRightSignExtendedUint64x8", + argLen: 3, + generic: true, + }, { name: "MaskedSubUint64x8", argLen: 3, @@ -61592,6 +66599,51 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "RotateLeftUint64x8", + argLen: 2, + generic: true, + }, + { + name: "RotateRightUint64x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllLeftUint64x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightUint64x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftUint64x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftLeftAndFillUpperFromUint64x8", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightUint64x8", + argLen: 2, + generic: true, + }, + { + name: "ShiftRightAndFillUpperFromUint64x8", + argLen: 3, + generic: true, + }, + { + name: "ShiftRightSignExtendedUint64x8", + argLen: 2, + generic: true, + }, { name: "SubUint64x8", argLen: 2, @@ -63293,42 +68345,402 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "MaskedShiftAllLeftAndFillUpperFromInt16x16", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightAndFillUpperFromInt16x16", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "ShiftAllLeftAndFillUpperFromInt16x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightAndFillUpperFromInt16x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedShiftAllLeftAndFillUpperFromInt16x32", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightAndFillUpperFromInt16x32", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "ShiftAllLeftAndFillUpperFromInt16x32", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightAndFillUpperFromInt16x32", + auxType: auxInt8, + argLen: 2, + generic: true, + }, { name: "GetElemInt16x8", auxType: auxInt8, argLen: 1, generic: true, }, + { + name: "MaskedShiftAllLeftAndFillUpperFromInt16x8", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightAndFillUpperFromInt16x8", + auxType: auxInt8, + argLen: 3, + generic: true, + }, { name: "SetElemInt16x8", auxType: auxInt8, argLen: 2, generic: true, }, + { + name: "ShiftAllLeftAndFillUpperFromInt16x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightAndFillUpperFromInt16x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRotateAllLeftInt32x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRotateAllRightInt32x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedShiftAllLeftAndFillUpperFromInt32x16", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightAndFillUpperFromInt32x16", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "RotateAllLeftInt32x16", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "RotateAllRightInt32x16", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "ShiftAllLeftAndFillUpperFromInt32x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightAndFillUpperFromInt32x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, { name: "GetElemInt32x4", auxType: auxInt8, argLen: 1, generic: true, }, + { + name: "MaskedRotateAllLeftInt32x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRotateAllRightInt32x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedShiftAllLeftAndFillUpperFromInt32x4", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightAndFillUpperFromInt32x4", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "RotateAllLeftInt32x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "RotateAllRightInt32x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, { name: "SetElemInt32x4", auxType: auxInt8, argLen: 2, generic: true, }, + { + name: "ShiftAllLeftAndFillUpperFromInt32x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightAndFillUpperFromInt32x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRotateAllLeftInt32x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRotateAllRightInt32x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedShiftAllLeftAndFillUpperFromInt32x8", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightAndFillUpperFromInt32x8", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "RotateAllLeftInt32x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "RotateAllRightInt32x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "ShiftAllLeftAndFillUpperFromInt32x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightAndFillUpperFromInt32x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, { name: "GetElemInt64x2", auxType: auxInt8, argLen: 1, generic: true, }, + { + name: "MaskedRotateAllLeftInt64x2", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRotateAllRightInt64x2", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedShiftAllLeftAndFillUpperFromInt64x2", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightAndFillUpperFromInt64x2", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "RotateAllLeftInt64x2", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "RotateAllRightInt64x2", + auxType: auxInt8, + argLen: 1, + generic: true, + }, { name: "SetElemInt64x2", auxType: auxInt8, argLen: 2, generic: true, }, + { + name: "ShiftAllLeftAndFillUpperFromInt64x2", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightAndFillUpperFromInt64x2", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRotateAllLeftInt64x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRotateAllRightInt64x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedShiftAllLeftAndFillUpperFromInt64x4", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightAndFillUpperFromInt64x4", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "RotateAllLeftInt64x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "RotateAllRightInt64x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "ShiftAllLeftAndFillUpperFromInt64x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightAndFillUpperFromInt64x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRotateAllLeftInt64x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRotateAllRightInt64x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedShiftAllLeftAndFillUpperFromInt64x8", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightAndFillUpperFromInt64x8", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "RotateAllLeftInt64x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "RotateAllRightInt64x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "ShiftAllLeftAndFillUpperFromInt64x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightAndFillUpperFromInt64x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, { name: "GetElemInt8x16", auxType: auxInt8, @@ -63341,42 +68753,402 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "MaskedShiftAllLeftAndFillUpperFromUint16x16", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightAndFillUpperFromUint16x16", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "ShiftAllLeftAndFillUpperFromUint16x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightAndFillUpperFromUint16x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedShiftAllLeftAndFillUpperFromUint16x32", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightAndFillUpperFromUint16x32", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "ShiftAllLeftAndFillUpperFromUint16x32", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightAndFillUpperFromUint16x32", + auxType: auxInt8, + argLen: 2, + generic: true, + }, { name: "GetElemUint16x8", auxType: auxInt8, argLen: 1, generic: true, }, + { + name: "MaskedShiftAllLeftAndFillUpperFromUint16x8", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightAndFillUpperFromUint16x8", + auxType: auxInt8, + argLen: 3, + generic: true, + }, { name: "SetElemUint16x8", auxType: auxInt8, argLen: 2, generic: true, }, + { + name: "ShiftAllLeftAndFillUpperFromUint16x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightAndFillUpperFromUint16x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRotateAllLeftUint32x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRotateAllRightUint32x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedShiftAllLeftAndFillUpperFromUint32x16", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightAndFillUpperFromUint32x16", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "RotateAllLeftUint32x16", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "RotateAllRightUint32x16", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "ShiftAllLeftAndFillUpperFromUint32x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightAndFillUpperFromUint32x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, { name: "GetElemUint32x4", auxType: auxInt8, argLen: 1, generic: true, }, + { + name: "MaskedRotateAllLeftUint32x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRotateAllRightUint32x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedShiftAllLeftAndFillUpperFromUint32x4", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightAndFillUpperFromUint32x4", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "RotateAllLeftUint32x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "RotateAllRightUint32x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, { name: "SetElemUint32x4", auxType: auxInt8, argLen: 2, generic: true, }, + { + name: "ShiftAllLeftAndFillUpperFromUint32x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightAndFillUpperFromUint32x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRotateAllLeftUint32x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRotateAllRightUint32x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedShiftAllLeftAndFillUpperFromUint32x8", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightAndFillUpperFromUint32x8", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "RotateAllLeftUint32x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "RotateAllRightUint32x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "ShiftAllLeftAndFillUpperFromUint32x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightAndFillUpperFromUint32x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, { name: "GetElemUint64x2", auxType: auxInt8, argLen: 1, generic: true, }, + { + name: "MaskedRotateAllLeftUint64x2", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRotateAllRightUint64x2", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedShiftAllLeftAndFillUpperFromUint64x2", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightAndFillUpperFromUint64x2", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "RotateAllLeftUint64x2", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "RotateAllRightUint64x2", + auxType: auxInt8, + argLen: 1, + generic: true, + }, { name: "SetElemUint64x2", auxType: auxInt8, argLen: 2, generic: true, }, + { + name: "ShiftAllLeftAndFillUpperFromUint64x2", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightAndFillUpperFromUint64x2", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRotateAllLeftUint64x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRotateAllRightUint64x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedShiftAllLeftAndFillUpperFromUint64x4", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightAndFillUpperFromUint64x4", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "RotateAllLeftUint64x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "RotateAllRightUint64x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "ShiftAllLeftAndFillUpperFromUint64x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightAndFillUpperFromUint64x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRotateAllLeftUint64x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRotateAllRightUint64x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedShiftAllLeftAndFillUpperFromUint64x8", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "MaskedShiftAllRightAndFillUpperFromUint64x8", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "RotateAllLeftUint64x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "RotateAllRightUint64x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "ShiftAllLeftAndFillUpperFromUint64x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightAndFillUpperFromUint64x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, { name: "GetElemUint8x16", auxType: auxInt8, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 668024a00fb..d7aa0339e7c 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -2862,6 +2862,102 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpMaskedPopCountUint8x32(v) case OpMaskedPopCountUint8x64: return rewriteValueAMD64_OpMaskedPopCountUint8x64(v) + case OpMaskedRotateAllLeftInt32x16: + return rewriteValueAMD64_OpMaskedRotateAllLeftInt32x16(v) + case OpMaskedRotateAllLeftInt32x4: + return rewriteValueAMD64_OpMaskedRotateAllLeftInt32x4(v) + case OpMaskedRotateAllLeftInt32x8: + return rewriteValueAMD64_OpMaskedRotateAllLeftInt32x8(v) + case OpMaskedRotateAllLeftInt64x2: + return rewriteValueAMD64_OpMaskedRotateAllLeftInt64x2(v) + case OpMaskedRotateAllLeftInt64x4: + return rewriteValueAMD64_OpMaskedRotateAllLeftInt64x4(v) + case OpMaskedRotateAllLeftInt64x8: + return rewriteValueAMD64_OpMaskedRotateAllLeftInt64x8(v) + case OpMaskedRotateAllLeftUint32x16: + return rewriteValueAMD64_OpMaskedRotateAllLeftUint32x16(v) + case OpMaskedRotateAllLeftUint32x4: + return rewriteValueAMD64_OpMaskedRotateAllLeftUint32x4(v) + case OpMaskedRotateAllLeftUint32x8: + return rewriteValueAMD64_OpMaskedRotateAllLeftUint32x8(v) + case OpMaskedRotateAllLeftUint64x2: + return rewriteValueAMD64_OpMaskedRotateAllLeftUint64x2(v) + case OpMaskedRotateAllLeftUint64x4: + return rewriteValueAMD64_OpMaskedRotateAllLeftUint64x4(v) + case OpMaskedRotateAllLeftUint64x8: + return rewriteValueAMD64_OpMaskedRotateAllLeftUint64x8(v) + case OpMaskedRotateAllRightInt32x16: + return rewriteValueAMD64_OpMaskedRotateAllRightInt32x16(v) + case OpMaskedRotateAllRightInt32x4: + return rewriteValueAMD64_OpMaskedRotateAllRightInt32x4(v) + case OpMaskedRotateAllRightInt32x8: + return rewriteValueAMD64_OpMaskedRotateAllRightInt32x8(v) + case OpMaskedRotateAllRightInt64x2: + return rewriteValueAMD64_OpMaskedRotateAllRightInt64x2(v) + case OpMaskedRotateAllRightInt64x4: + return rewriteValueAMD64_OpMaskedRotateAllRightInt64x4(v) + case OpMaskedRotateAllRightInt64x8: + return rewriteValueAMD64_OpMaskedRotateAllRightInt64x8(v) + case OpMaskedRotateAllRightUint32x16: + return rewriteValueAMD64_OpMaskedRotateAllRightUint32x16(v) + case OpMaskedRotateAllRightUint32x4: + return rewriteValueAMD64_OpMaskedRotateAllRightUint32x4(v) + case OpMaskedRotateAllRightUint32x8: + return rewriteValueAMD64_OpMaskedRotateAllRightUint32x8(v) + case OpMaskedRotateAllRightUint64x2: + return rewriteValueAMD64_OpMaskedRotateAllRightUint64x2(v) + case OpMaskedRotateAllRightUint64x4: + return rewriteValueAMD64_OpMaskedRotateAllRightUint64x4(v) + case OpMaskedRotateAllRightUint64x8: + return rewriteValueAMD64_OpMaskedRotateAllRightUint64x8(v) + case OpMaskedRotateLeftInt32x16: + return rewriteValueAMD64_OpMaskedRotateLeftInt32x16(v) + case OpMaskedRotateLeftInt32x4: + return rewriteValueAMD64_OpMaskedRotateLeftInt32x4(v) + case OpMaskedRotateLeftInt32x8: + return rewriteValueAMD64_OpMaskedRotateLeftInt32x8(v) + case OpMaskedRotateLeftInt64x2: + return rewriteValueAMD64_OpMaskedRotateLeftInt64x2(v) + case OpMaskedRotateLeftInt64x4: + return rewriteValueAMD64_OpMaskedRotateLeftInt64x4(v) + case OpMaskedRotateLeftInt64x8: + return rewriteValueAMD64_OpMaskedRotateLeftInt64x8(v) + case OpMaskedRotateLeftUint32x16: + return rewriteValueAMD64_OpMaskedRotateLeftUint32x16(v) + case OpMaskedRotateLeftUint32x4: + return rewriteValueAMD64_OpMaskedRotateLeftUint32x4(v) + case OpMaskedRotateLeftUint32x8: + return rewriteValueAMD64_OpMaskedRotateLeftUint32x8(v) + case OpMaskedRotateLeftUint64x2: + return rewriteValueAMD64_OpMaskedRotateLeftUint64x2(v) + case OpMaskedRotateLeftUint64x4: + return rewriteValueAMD64_OpMaskedRotateLeftUint64x4(v) + case OpMaskedRotateLeftUint64x8: + return rewriteValueAMD64_OpMaskedRotateLeftUint64x8(v) + case OpMaskedRotateRightInt32x16: + return rewriteValueAMD64_OpMaskedRotateRightInt32x16(v) + case OpMaskedRotateRightInt32x4: + return rewriteValueAMD64_OpMaskedRotateRightInt32x4(v) + case OpMaskedRotateRightInt32x8: + return rewriteValueAMD64_OpMaskedRotateRightInt32x8(v) + case OpMaskedRotateRightInt64x2: + return rewriteValueAMD64_OpMaskedRotateRightInt64x2(v) + case OpMaskedRotateRightInt64x4: + return rewriteValueAMD64_OpMaskedRotateRightInt64x4(v) + case OpMaskedRotateRightInt64x8: + return rewriteValueAMD64_OpMaskedRotateRightInt64x8(v) + case OpMaskedRotateRightUint32x16: + return rewriteValueAMD64_OpMaskedRotateRightUint32x16(v) + case OpMaskedRotateRightUint32x4: + return rewriteValueAMD64_OpMaskedRotateRightUint32x4(v) + case OpMaskedRotateRightUint32x8: + return rewriteValueAMD64_OpMaskedRotateRightUint32x8(v) + case OpMaskedRotateRightUint64x2: + return rewriteValueAMD64_OpMaskedRotateRightUint64x2(v) + case OpMaskedRotateRightUint64x4: + return rewriteValueAMD64_OpMaskedRotateRightUint64x4(v) + case OpMaskedRotateRightUint64x8: + return rewriteValueAMD64_OpMaskedRotateRightUint64x8(v) case OpMaskedRoundSuppressExceptionWithPrecisionFloat32x16: return rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat32x16(v) case OpMaskedRoundSuppressExceptionWithPrecisionFloat32x4: @@ -2958,6 +3054,288 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4(v) case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8: return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8(v) + case OpMaskedShiftAllLeftAndFillUpperFromInt16x16: + return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt16x16(v) + case OpMaskedShiftAllLeftAndFillUpperFromInt16x32: + return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt16x32(v) + case OpMaskedShiftAllLeftAndFillUpperFromInt16x8: + return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt16x8(v) + case OpMaskedShiftAllLeftAndFillUpperFromInt32x16: + return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt32x16(v) + case OpMaskedShiftAllLeftAndFillUpperFromInt32x4: + return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt32x4(v) + case OpMaskedShiftAllLeftAndFillUpperFromInt32x8: + return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt32x8(v) + case OpMaskedShiftAllLeftAndFillUpperFromInt64x2: + return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt64x2(v) + case OpMaskedShiftAllLeftAndFillUpperFromInt64x4: + return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt64x4(v) + case OpMaskedShiftAllLeftAndFillUpperFromInt64x8: + return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt64x8(v) + case OpMaskedShiftAllLeftAndFillUpperFromUint16x16: + return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint16x16(v) + case OpMaskedShiftAllLeftAndFillUpperFromUint16x32: + return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint16x32(v) + case OpMaskedShiftAllLeftAndFillUpperFromUint16x8: + return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint16x8(v) + case OpMaskedShiftAllLeftAndFillUpperFromUint32x16: + return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint32x16(v) + case OpMaskedShiftAllLeftAndFillUpperFromUint32x4: + return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint32x4(v) + case OpMaskedShiftAllLeftAndFillUpperFromUint32x8: + return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint32x8(v) + case OpMaskedShiftAllLeftAndFillUpperFromUint64x2: + return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint64x2(v) + case OpMaskedShiftAllLeftAndFillUpperFromUint64x4: + return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint64x4(v) + case OpMaskedShiftAllLeftAndFillUpperFromUint64x8: + return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint64x8(v) + case OpMaskedShiftAllLeftInt64x2: + return rewriteValueAMD64_OpMaskedShiftAllLeftInt64x2(v) + case OpMaskedShiftAllLeftInt64x4: + return rewriteValueAMD64_OpMaskedShiftAllLeftInt64x4(v) + case OpMaskedShiftAllLeftInt64x8: + return rewriteValueAMD64_OpMaskedShiftAllLeftInt64x8(v) + case OpMaskedShiftAllLeftUint64x2: + return rewriteValueAMD64_OpMaskedShiftAllLeftUint64x2(v) + case OpMaskedShiftAllLeftUint64x4: + return rewriteValueAMD64_OpMaskedShiftAllLeftUint64x4(v) + case OpMaskedShiftAllLeftUint64x8: + return rewriteValueAMD64_OpMaskedShiftAllLeftUint64x8(v) + case OpMaskedShiftAllRightAndFillUpperFromInt16x16: + return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt16x16(v) + case OpMaskedShiftAllRightAndFillUpperFromInt16x32: + return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt16x32(v) + case OpMaskedShiftAllRightAndFillUpperFromInt16x8: + return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt16x8(v) + case OpMaskedShiftAllRightAndFillUpperFromInt32x16: + return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt32x16(v) + case OpMaskedShiftAllRightAndFillUpperFromInt32x4: + return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt32x4(v) + case OpMaskedShiftAllRightAndFillUpperFromInt32x8: + return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt32x8(v) + case OpMaskedShiftAllRightAndFillUpperFromInt64x2: + return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt64x2(v) + case OpMaskedShiftAllRightAndFillUpperFromInt64x4: + return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt64x4(v) + case OpMaskedShiftAllRightAndFillUpperFromInt64x8: + return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt64x8(v) + case OpMaskedShiftAllRightAndFillUpperFromUint16x16: + return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint16x16(v) + case OpMaskedShiftAllRightAndFillUpperFromUint16x32: + return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint16x32(v) + case OpMaskedShiftAllRightAndFillUpperFromUint16x8: + return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint16x8(v) + case OpMaskedShiftAllRightAndFillUpperFromUint32x16: + return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint32x16(v) + case OpMaskedShiftAllRightAndFillUpperFromUint32x4: + return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint32x4(v) + case OpMaskedShiftAllRightAndFillUpperFromUint32x8: + return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint32x8(v) + case OpMaskedShiftAllRightAndFillUpperFromUint64x2: + return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint64x2(v) + case OpMaskedShiftAllRightAndFillUpperFromUint64x4: + return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint64x4(v) + case OpMaskedShiftAllRightAndFillUpperFromUint64x8: + return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint64x8(v) + case OpMaskedShiftAllRightInt64x2: + return rewriteValueAMD64_OpMaskedShiftAllRightInt64x2(v) + case OpMaskedShiftAllRightInt64x4: + return rewriteValueAMD64_OpMaskedShiftAllRightInt64x4(v) + case OpMaskedShiftAllRightInt64x8: + return rewriteValueAMD64_OpMaskedShiftAllRightInt64x8(v) + case OpMaskedShiftAllRightSignExtendedInt64x2: + return rewriteValueAMD64_OpMaskedShiftAllRightSignExtendedInt64x2(v) + case OpMaskedShiftAllRightSignExtendedInt64x4: + return rewriteValueAMD64_OpMaskedShiftAllRightSignExtendedInt64x4(v) + case OpMaskedShiftAllRightSignExtendedInt64x8: + return rewriteValueAMD64_OpMaskedShiftAllRightSignExtendedInt64x8(v) + case OpMaskedShiftAllRightUint64x2: + return rewriteValueAMD64_OpMaskedShiftAllRightUint64x2(v) + case OpMaskedShiftAllRightUint64x4: + return rewriteValueAMD64_OpMaskedShiftAllRightUint64x4(v) + case OpMaskedShiftAllRightUint64x8: + return rewriteValueAMD64_OpMaskedShiftAllRightUint64x8(v) + case OpMaskedShiftLeftAndFillUpperFromInt16x16: + return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt16x16(v) + case OpMaskedShiftLeftAndFillUpperFromInt16x32: + return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt16x32(v) + case OpMaskedShiftLeftAndFillUpperFromInt16x8: + return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt16x8(v) + case OpMaskedShiftLeftAndFillUpperFromInt32x16: + return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt32x16(v) + case OpMaskedShiftLeftAndFillUpperFromInt32x4: + return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt32x4(v) + case OpMaskedShiftLeftAndFillUpperFromInt32x8: + return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt32x8(v) + case OpMaskedShiftLeftAndFillUpperFromInt64x2: + return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt64x2(v) + case OpMaskedShiftLeftAndFillUpperFromInt64x4: + return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt64x4(v) + case OpMaskedShiftLeftAndFillUpperFromInt64x8: + return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt64x8(v) + case OpMaskedShiftLeftAndFillUpperFromUint16x16: + return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint16x16(v) + case OpMaskedShiftLeftAndFillUpperFromUint16x32: + return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint16x32(v) + case OpMaskedShiftLeftAndFillUpperFromUint16x8: + return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint16x8(v) + case OpMaskedShiftLeftAndFillUpperFromUint32x16: + return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint32x16(v) + case OpMaskedShiftLeftAndFillUpperFromUint32x4: + return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint32x4(v) + case OpMaskedShiftLeftAndFillUpperFromUint32x8: + return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint32x8(v) + case OpMaskedShiftLeftAndFillUpperFromUint64x2: + return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint64x2(v) + case OpMaskedShiftLeftAndFillUpperFromUint64x4: + return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint64x4(v) + case OpMaskedShiftLeftAndFillUpperFromUint64x8: + return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint64x8(v) + case OpMaskedShiftLeftInt16x16: + return rewriteValueAMD64_OpMaskedShiftLeftInt16x16(v) + case OpMaskedShiftLeftInt16x32: + return rewriteValueAMD64_OpMaskedShiftLeftInt16x32(v) + case OpMaskedShiftLeftInt16x8: + return rewriteValueAMD64_OpMaskedShiftLeftInt16x8(v) + case OpMaskedShiftLeftInt32x16: + return rewriteValueAMD64_OpMaskedShiftLeftInt32x16(v) + case OpMaskedShiftLeftInt32x4: + return rewriteValueAMD64_OpMaskedShiftLeftInt32x4(v) + case OpMaskedShiftLeftInt32x8: + return rewriteValueAMD64_OpMaskedShiftLeftInt32x8(v) + case OpMaskedShiftLeftInt64x2: + return rewriteValueAMD64_OpMaskedShiftLeftInt64x2(v) + case OpMaskedShiftLeftInt64x4: + return rewriteValueAMD64_OpMaskedShiftLeftInt64x4(v) + case OpMaskedShiftLeftInt64x8: + return rewriteValueAMD64_OpMaskedShiftLeftInt64x8(v) + case OpMaskedShiftLeftUint16x16: + return rewriteValueAMD64_OpMaskedShiftLeftUint16x16(v) + case OpMaskedShiftLeftUint16x32: + return rewriteValueAMD64_OpMaskedShiftLeftUint16x32(v) + case OpMaskedShiftLeftUint16x8: + return rewriteValueAMD64_OpMaskedShiftLeftUint16x8(v) + case OpMaskedShiftLeftUint32x16: + return rewriteValueAMD64_OpMaskedShiftLeftUint32x16(v) + case OpMaskedShiftLeftUint32x4: + return rewriteValueAMD64_OpMaskedShiftLeftUint32x4(v) + case OpMaskedShiftLeftUint32x8: + return rewriteValueAMD64_OpMaskedShiftLeftUint32x8(v) + case OpMaskedShiftLeftUint64x2: + return rewriteValueAMD64_OpMaskedShiftLeftUint64x2(v) + case OpMaskedShiftLeftUint64x4: + return rewriteValueAMD64_OpMaskedShiftLeftUint64x4(v) + case OpMaskedShiftLeftUint64x8: + return rewriteValueAMD64_OpMaskedShiftLeftUint64x8(v) + case OpMaskedShiftRightAndFillUpperFromInt16x16: + return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt16x16(v) + case OpMaskedShiftRightAndFillUpperFromInt16x32: + return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt16x32(v) + case OpMaskedShiftRightAndFillUpperFromInt16x8: + return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt16x8(v) + case OpMaskedShiftRightAndFillUpperFromInt32x16: + return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt32x16(v) + case OpMaskedShiftRightAndFillUpperFromInt32x4: + return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt32x4(v) + case OpMaskedShiftRightAndFillUpperFromInt32x8: + return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt32x8(v) + case OpMaskedShiftRightAndFillUpperFromInt64x2: + return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt64x2(v) + case OpMaskedShiftRightAndFillUpperFromInt64x4: + return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt64x4(v) + case OpMaskedShiftRightAndFillUpperFromInt64x8: + return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt64x8(v) + case OpMaskedShiftRightAndFillUpperFromUint16x16: + return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint16x16(v) + case OpMaskedShiftRightAndFillUpperFromUint16x32: + return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint16x32(v) + case OpMaskedShiftRightAndFillUpperFromUint16x8: + return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint16x8(v) + case OpMaskedShiftRightAndFillUpperFromUint32x16: + return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint32x16(v) + case OpMaskedShiftRightAndFillUpperFromUint32x4: + return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint32x4(v) + case OpMaskedShiftRightAndFillUpperFromUint32x8: + return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint32x8(v) + case OpMaskedShiftRightAndFillUpperFromUint64x2: + return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint64x2(v) + case OpMaskedShiftRightAndFillUpperFromUint64x4: + return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint64x4(v) + case OpMaskedShiftRightAndFillUpperFromUint64x8: + return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint64x8(v) + case OpMaskedShiftRightInt16x16: + return rewriteValueAMD64_OpMaskedShiftRightInt16x16(v) + case OpMaskedShiftRightInt16x32: + return rewriteValueAMD64_OpMaskedShiftRightInt16x32(v) + case OpMaskedShiftRightInt16x8: + return rewriteValueAMD64_OpMaskedShiftRightInt16x8(v) + case OpMaskedShiftRightInt32x16: + return rewriteValueAMD64_OpMaskedShiftRightInt32x16(v) + case OpMaskedShiftRightInt32x4: + return rewriteValueAMD64_OpMaskedShiftRightInt32x4(v) + case OpMaskedShiftRightInt32x8: + return rewriteValueAMD64_OpMaskedShiftRightInt32x8(v) + case OpMaskedShiftRightInt64x2: + return rewriteValueAMD64_OpMaskedShiftRightInt64x2(v) + case OpMaskedShiftRightInt64x4: + return rewriteValueAMD64_OpMaskedShiftRightInt64x4(v) + case OpMaskedShiftRightInt64x8: + return rewriteValueAMD64_OpMaskedShiftRightInt64x8(v) + case OpMaskedShiftRightSignExtendedInt16x16: + return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt16x16(v) + case OpMaskedShiftRightSignExtendedInt16x32: + return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt16x32(v) + case OpMaskedShiftRightSignExtendedInt16x8: + return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt16x8(v) + case OpMaskedShiftRightSignExtendedInt32x16: + return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt32x16(v) + case OpMaskedShiftRightSignExtendedInt32x4: + return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt32x4(v) + case OpMaskedShiftRightSignExtendedInt32x8: + return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt32x8(v) + case OpMaskedShiftRightSignExtendedInt64x2: + return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt64x2(v) + case OpMaskedShiftRightSignExtendedInt64x4: + return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt64x4(v) + case OpMaskedShiftRightSignExtendedInt64x8: + return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt64x8(v) + case OpMaskedShiftRightSignExtendedUint16x16: + return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint16x16(v) + case OpMaskedShiftRightSignExtendedUint16x32: + return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint16x32(v) + case OpMaskedShiftRightSignExtendedUint16x8: + return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint16x8(v) + case OpMaskedShiftRightSignExtendedUint32x16: + return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint32x16(v) + case OpMaskedShiftRightSignExtendedUint32x4: + return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint32x4(v) + case OpMaskedShiftRightSignExtendedUint32x8: + return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint32x8(v) + case OpMaskedShiftRightSignExtendedUint64x2: + return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint64x2(v) + case OpMaskedShiftRightSignExtendedUint64x4: + return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint64x4(v) + case OpMaskedShiftRightSignExtendedUint64x8: + return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint64x8(v) + case OpMaskedShiftRightUint16x16: + return rewriteValueAMD64_OpMaskedShiftRightUint16x16(v) + case OpMaskedShiftRightUint16x32: + return rewriteValueAMD64_OpMaskedShiftRightUint16x32(v) + case OpMaskedShiftRightUint16x8: + return rewriteValueAMD64_OpMaskedShiftRightUint16x8(v) + case OpMaskedShiftRightUint32x16: + return rewriteValueAMD64_OpMaskedShiftRightUint32x16(v) + case OpMaskedShiftRightUint32x4: + return rewriteValueAMD64_OpMaskedShiftRightUint32x4(v) + case OpMaskedShiftRightUint32x8: + return rewriteValueAMD64_OpMaskedShiftRightUint32x8(v) + case OpMaskedShiftRightUint64x2: + return rewriteValueAMD64_OpMaskedShiftRightUint64x2(v) + case OpMaskedShiftRightUint64x4: + return rewriteValueAMD64_OpMaskedShiftRightUint64x4(v) + case OpMaskedShiftRightUint64x8: + return rewriteValueAMD64_OpMaskedShiftRightUint64x8(v) case OpMaskedSqrtFloat32x16: return rewriteValueAMD64_OpMaskedSqrtFloat32x16(v) case OpMaskedSqrtFloat32x4: @@ -3812,6 +4190,54 @@ func rewriteValueAMD64(v *Value) bool { case OpPrefetchCacheStreamed: v.Op = OpAMD64PrefetchNTA return true + case OpRotateAllLeftInt32x16: + return rewriteValueAMD64_OpRotateAllLeftInt32x16(v) + case OpRotateAllLeftInt32x4: + return rewriteValueAMD64_OpRotateAllLeftInt32x4(v) + case OpRotateAllLeftInt32x8: + return rewriteValueAMD64_OpRotateAllLeftInt32x8(v) + case OpRotateAllLeftInt64x2: + return rewriteValueAMD64_OpRotateAllLeftInt64x2(v) + case OpRotateAllLeftInt64x4: + return rewriteValueAMD64_OpRotateAllLeftInt64x4(v) + case OpRotateAllLeftInt64x8: + return rewriteValueAMD64_OpRotateAllLeftInt64x8(v) + case OpRotateAllLeftUint32x16: + return rewriteValueAMD64_OpRotateAllLeftUint32x16(v) + case OpRotateAllLeftUint32x4: + return rewriteValueAMD64_OpRotateAllLeftUint32x4(v) + case OpRotateAllLeftUint32x8: + return rewriteValueAMD64_OpRotateAllLeftUint32x8(v) + case OpRotateAllLeftUint64x2: + return rewriteValueAMD64_OpRotateAllLeftUint64x2(v) + case OpRotateAllLeftUint64x4: + return rewriteValueAMD64_OpRotateAllLeftUint64x4(v) + case OpRotateAllLeftUint64x8: + return rewriteValueAMD64_OpRotateAllLeftUint64x8(v) + case OpRotateAllRightInt32x16: + return rewriteValueAMD64_OpRotateAllRightInt32x16(v) + case OpRotateAllRightInt32x4: + return rewriteValueAMD64_OpRotateAllRightInt32x4(v) + case OpRotateAllRightInt32x8: + return rewriteValueAMD64_OpRotateAllRightInt32x8(v) + case OpRotateAllRightInt64x2: + return rewriteValueAMD64_OpRotateAllRightInt64x2(v) + case OpRotateAllRightInt64x4: + return rewriteValueAMD64_OpRotateAllRightInt64x4(v) + case OpRotateAllRightInt64x8: + return rewriteValueAMD64_OpRotateAllRightInt64x8(v) + case OpRotateAllRightUint32x16: + return rewriteValueAMD64_OpRotateAllRightUint32x16(v) + case OpRotateAllRightUint32x4: + return rewriteValueAMD64_OpRotateAllRightUint32x4(v) + case OpRotateAllRightUint32x8: + return rewriteValueAMD64_OpRotateAllRightUint32x8(v) + case OpRotateAllRightUint64x2: + return rewriteValueAMD64_OpRotateAllRightUint64x2(v) + case OpRotateAllRightUint64x4: + return rewriteValueAMD64_OpRotateAllRightUint64x4(v) + case OpRotateAllRightUint64x8: + return rewriteValueAMD64_OpRotateAllRightUint64x8(v) case OpRotateLeft16: v.Op = OpAMD64ROLW return true @@ -3824,6 +4250,78 @@ func rewriteValueAMD64(v *Value) bool { case OpRotateLeft8: v.Op = OpAMD64ROLB return true + case OpRotateLeftInt32x16: + v.Op = OpAMD64VPROLVD512 + return true + case OpRotateLeftInt32x4: + v.Op = OpAMD64VPROLVD128 + return true + case OpRotateLeftInt32x8: + v.Op = OpAMD64VPROLVD256 + return true + case OpRotateLeftInt64x2: + v.Op = OpAMD64VPROLVQ128 + return true + case OpRotateLeftInt64x4: + v.Op = OpAMD64VPROLVQ256 + return true + case OpRotateLeftInt64x8: + v.Op = OpAMD64VPROLVQ512 + return true + case OpRotateLeftUint32x16: + v.Op = OpAMD64VPROLVD512 + return true + case OpRotateLeftUint32x4: + v.Op = OpAMD64VPROLVD128 + return true + case OpRotateLeftUint32x8: + v.Op = OpAMD64VPROLVD256 + return true + case OpRotateLeftUint64x2: + v.Op = OpAMD64VPROLVQ128 + return true + case OpRotateLeftUint64x4: + v.Op = OpAMD64VPROLVQ256 + return true + case OpRotateLeftUint64x8: + v.Op = OpAMD64VPROLVQ512 + return true + case OpRotateRightInt32x16: + v.Op = OpAMD64VPRORVD512 + return true + case OpRotateRightInt32x4: + v.Op = OpAMD64VPRORVD128 + return true + case OpRotateRightInt32x8: + v.Op = OpAMD64VPRORVD256 + return true + case OpRotateRightInt64x2: + v.Op = OpAMD64VPRORVQ128 + return true + case OpRotateRightInt64x4: + v.Op = OpAMD64VPRORVQ256 + return true + case OpRotateRightInt64x8: + v.Op = OpAMD64VPRORVQ512 + return true + case OpRotateRightUint32x16: + v.Op = OpAMD64VPRORVD512 + return true + case OpRotateRightUint32x4: + v.Op = OpAMD64VPRORVD128 + return true + case OpRotateRightUint32x8: + v.Op = OpAMD64VPRORVD256 + return true + case OpRotateRightUint64x2: + v.Op = OpAMD64VPRORVQ128 + return true + case OpRotateRightUint64x4: + v.Op = OpAMD64VPRORVQ256 + return true + case OpRotateRightUint64x8: + v.Op = OpAMD64VPRORVQ512 + return true case OpRound32F: v.Op = OpAMD64LoweredRound32F return true @@ -4070,6 +4568,453 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpSetElemUint64x2(v) case OpSetElemUint8x16: return rewriteValueAMD64_OpSetElemUint8x16(v) + case OpShiftAllLeftAndFillUpperFromInt16x16: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt16x16(v) + case OpShiftAllLeftAndFillUpperFromInt16x32: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt16x32(v) + case OpShiftAllLeftAndFillUpperFromInt16x8: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt16x8(v) + case OpShiftAllLeftAndFillUpperFromInt32x16: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt32x16(v) + case OpShiftAllLeftAndFillUpperFromInt32x4: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt32x4(v) + case OpShiftAllLeftAndFillUpperFromInt32x8: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt32x8(v) + case OpShiftAllLeftAndFillUpperFromInt64x2: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt64x2(v) + case OpShiftAllLeftAndFillUpperFromInt64x4: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt64x4(v) + case OpShiftAllLeftAndFillUpperFromInt64x8: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt64x8(v) + case OpShiftAllLeftAndFillUpperFromUint16x16: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint16x16(v) + case OpShiftAllLeftAndFillUpperFromUint16x32: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint16x32(v) + case OpShiftAllLeftAndFillUpperFromUint16x8: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint16x8(v) + case OpShiftAllLeftAndFillUpperFromUint32x16: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint32x16(v) + case OpShiftAllLeftAndFillUpperFromUint32x4: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint32x4(v) + case OpShiftAllLeftAndFillUpperFromUint32x8: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint32x8(v) + case OpShiftAllLeftAndFillUpperFromUint64x2: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint64x2(v) + case OpShiftAllLeftAndFillUpperFromUint64x4: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint64x4(v) + case OpShiftAllLeftAndFillUpperFromUint64x8: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint64x8(v) + case OpShiftAllLeftInt16x16: + v.Op = OpAMD64VPSLLW256 + return true + case OpShiftAllLeftInt16x8: + v.Op = OpAMD64VPSLLW128 + return true + case OpShiftAllLeftInt32x4: + v.Op = OpAMD64VPSLLD128 + return true + case OpShiftAllLeftInt32x8: + v.Op = OpAMD64VPSLLD256 + return true + case OpShiftAllLeftInt64x2: + v.Op = OpAMD64VPSLLQ128 + return true + case OpShiftAllLeftInt64x4: + v.Op = OpAMD64VPSLLQ256 + return true + case OpShiftAllLeftInt64x8: + v.Op = OpAMD64VPSLLQ512 + return true + case OpShiftAllLeftUint16x16: + v.Op = OpAMD64VPSLLW256 + return true + case OpShiftAllLeftUint16x8: + v.Op = OpAMD64VPSLLW128 + return true + case OpShiftAllLeftUint32x4: + v.Op = OpAMD64VPSLLD128 + return true + case OpShiftAllLeftUint32x8: + v.Op = OpAMD64VPSLLD256 + return true + case OpShiftAllLeftUint64x2: + v.Op = OpAMD64VPSLLQ128 + return true + case OpShiftAllLeftUint64x4: + v.Op = OpAMD64VPSLLQ256 + return true + case OpShiftAllLeftUint64x8: + v.Op = OpAMD64VPSLLQ512 + return true + case OpShiftAllRightAndFillUpperFromInt16x16: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt16x16(v) + case OpShiftAllRightAndFillUpperFromInt16x32: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt16x32(v) + case OpShiftAllRightAndFillUpperFromInt16x8: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt16x8(v) + case OpShiftAllRightAndFillUpperFromInt32x16: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt32x16(v) + case OpShiftAllRightAndFillUpperFromInt32x4: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt32x4(v) + case OpShiftAllRightAndFillUpperFromInt32x8: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt32x8(v) + case OpShiftAllRightAndFillUpperFromInt64x2: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt64x2(v) + case OpShiftAllRightAndFillUpperFromInt64x4: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt64x4(v) + case OpShiftAllRightAndFillUpperFromInt64x8: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt64x8(v) + case OpShiftAllRightAndFillUpperFromUint16x16: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint16x16(v) + case OpShiftAllRightAndFillUpperFromUint16x32: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint16x32(v) + case OpShiftAllRightAndFillUpperFromUint16x8: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint16x8(v) + case OpShiftAllRightAndFillUpperFromUint32x16: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint32x16(v) + case OpShiftAllRightAndFillUpperFromUint32x4: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint32x4(v) + case OpShiftAllRightAndFillUpperFromUint32x8: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint32x8(v) + case OpShiftAllRightAndFillUpperFromUint64x2: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint64x2(v) + case OpShiftAllRightAndFillUpperFromUint64x4: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint64x4(v) + case OpShiftAllRightAndFillUpperFromUint64x8: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint64x8(v) + case OpShiftAllRightInt16x16: + v.Op = OpAMD64VPSRLW256 + return true + case OpShiftAllRightInt16x8: + v.Op = OpAMD64VPSRLW128 + return true + case OpShiftAllRightInt32x4: + v.Op = OpAMD64VPSRLD128 + return true + case OpShiftAllRightInt32x8: + v.Op = OpAMD64VPSRLD256 + return true + case OpShiftAllRightInt64x2: + v.Op = OpAMD64VPSRLQ128 + return true + case OpShiftAllRightInt64x4: + v.Op = OpAMD64VPSRLQ256 + return true + case OpShiftAllRightInt64x8: + v.Op = OpAMD64VPSRLQ512 + return true + case OpShiftAllRightSignExtendedInt16x16: + v.Op = OpAMD64VPSRAW256 + return true + case OpShiftAllRightSignExtendedInt16x8: + v.Op = OpAMD64VPSRAW128 + return true + case OpShiftAllRightSignExtendedInt32x4: + v.Op = OpAMD64VPSRAD128 + return true + case OpShiftAllRightSignExtendedInt32x8: + v.Op = OpAMD64VPSRAD256 + return true + case OpShiftAllRightSignExtendedInt64x2: + v.Op = OpAMD64VPSRAQ128 + return true + case OpShiftAllRightSignExtendedInt64x4: + v.Op = OpAMD64VPSRAQ256 + return true + case OpShiftAllRightSignExtendedInt64x8: + v.Op = OpAMD64VPSRAQ512 + return true + case OpShiftAllRightUint16x16: + v.Op = OpAMD64VPSRLW256 + return true + case OpShiftAllRightUint16x8: + v.Op = OpAMD64VPSRLW128 + return true + case OpShiftAllRightUint32x4: + v.Op = OpAMD64VPSRLD128 + return true + case OpShiftAllRightUint32x8: + v.Op = OpAMD64VPSRLD256 + return true + case OpShiftAllRightUint64x2: + v.Op = OpAMD64VPSRLQ128 + return true + case OpShiftAllRightUint64x4: + v.Op = OpAMD64VPSRLQ256 + return true + case OpShiftAllRightUint64x8: + v.Op = OpAMD64VPSRLQ512 + return true + case OpShiftLeftAndFillUpperFromInt16x16: + v.Op = OpAMD64VPSHLDVW256 + return true + case OpShiftLeftAndFillUpperFromInt16x32: + v.Op = OpAMD64VPSHLDVW512 + return true + case OpShiftLeftAndFillUpperFromInt16x8: + v.Op = OpAMD64VPSHLDVW128 + return true + case OpShiftLeftAndFillUpperFromInt32x16: + v.Op = OpAMD64VPSHLDVD512 + return true + case OpShiftLeftAndFillUpperFromInt32x4: + v.Op = OpAMD64VPSHLDVD128 + return true + case OpShiftLeftAndFillUpperFromInt32x8: + v.Op = OpAMD64VPSHLDVD256 + return true + case OpShiftLeftAndFillUpperFromInt64x2: + v.Op = OpAMD64VPSHLDVQ128 + return true + case OpShiftLeftAndFillUpperFromInt64x4: + v.Op = OpAMD64VPSHLDVQ256 + return true + case OpShiftLeftAndFillUpperFromInt64x8: + v.Op = OpAMD64VPSHLDVQ512 + return true + case OpShiftLeftAndFillUpperFromUint16x16: + v.Op = OpAMD64VPSHLDVW256 + return true + case OpShiftLeftAndFillUpperFromUint16x32: + v.Op = OpAMD64VPSHLDVW512 + return true + case OpShiftLeftAndFillUpperFromUint16x8: + v.Op = OpAMD64VPSHLDVW128 + return true + case OpShiftLeftAndFillUpperFromUint32x16: + v.Op = OpAMD64VPSHLDVD512 + return true + case OpShiftLeftAndFillUpperFromUint32x4: + v.Op = OpAMD64VPSHLDVD128 + return true + case OpShiftLeftAndFillUpperFromUint32x8: + v.Op = OpAMD64VPSHLDVD256 + return true + case OpShiftLeftAndFillUpperFromUint64x2: + v.Op = OpAMD64VPSHLDVQ128 + return true + case OpShiftLeftAndFillUpperFromUint64x4: + v.Op = OpAMD64VPSHLDVQ256 + return true + case OpShiftLeftAndFillUpperFromUint64x8: + v.Op = OpAMD64VPSHLDVQ512 + return true + case OpShiftLeftInt16x16: + v.Op = OpAMD64VPSLLVW256 + return true + case OpShiftLeftInt16x32: + v.Op = OpAMD64VPSLLVW512 + return true + case OpShiftLeftInt16x8: + v.Op = OpAMD64VPSLLVW128 + return true + case OpShiftLeftInt32x16: + v.Op = OpAMD64VPSLLVD512 + return true + case OpShiftLeftInt32x4: + v.Op = OpAMD64VPSLLVD128 + return true + case OpShiftLeftInt32x8: + v.Op = OpAMD64VPSLLVD256 + return true + case OpShiftLeftInt64x2: + v.Op = OpAMD64VPSLLVQ128 + return true + case OpShiftLeftInt64x4: + v.Op = OpAMD64VPSLLVQ256 + return true + case OpShiftLeftInt64x8: + v.Op = OpAMD64VPSLLVQ512 + return true + case OpShiftLeftUint16x16: + v.Op = OpAMD64VPSLLVW256 + return true + case OpShiftLeftUint16x32: + v.Op = OpAMD64VPSLLVW512 + return true + case OpShiftLeftUint16x8: + v.Op = OpAMD64VPSLLVW128 + return true + case OpShiftLeftUint32x16: + v.Op = OpAMD64VPSLLVD512 + return true + case OpShiftLeftUint32x4: + v.Op = OpAMD64VPSLLVD128 + return true + case OpShiftLeftUint32x8: + v.Op = OpAMD64VPSLLVD256 + return true + case OpShiftLeftUint64x2: + v.Op = OpAMD64VPSLLVQ128 + return true + case OpShiftLeftUint64x4: + v.Op = OpAMD64VPSLLVQ256 + return true + case OpShiftLeftUint64x8: + v.Op = OpAMD64VPSLLVQ512 + return true + case OpShiftRightAndFillUpperFromInt16x16: + v.Op = OpAMD64VPSHRDVW256 + return true + case OpShiftRightAndFillUpperFromInt16x32: + v.Op = OpAMD64VPSHRDVW512 + return true + case OpShiftRightAndFillUpperFromInt16x8: + v.Op = OpAMD64VPSHRDVW128 + return true + case OpShiftRightAndFillUpperFromInt32x16: + v.Op = OpAMD64VPSHRDVD512 + return true + case OpShiftRightAndFillUpperFromInt32x4: + v.Op = OpAMD64VPSHRDVD128 + return true + case OpShiftRightAndFillUpperFromInt32x8: + v.Op = OpAMD64VPSHRDVD256 + return true + case OpShiftRightAndFillUpperFromInt64x2: + v.Op = OpAMD64VPSHRDVQ128 + return true + case OpShiftRightAndFillUpperFromInt64x4: + v.Op = OpAMD64VPSHRDVQ256 + return true + case OpShiftRightAndFillUpperFromInt64x8: + v.Op = OpAMD64VPSHRDVQ512 + return true + case OpShiftRightAndFillUpperFromUint16x16: + v.Op = OpAMD64VPSHRDVW256 + return true + case OpShiftRightAndFillUpperFromUint16x32: + v.Op = OpAMD64VPSHRDVW512 + return true + case OpShiftRightAndFillUpperFromUint16x8: + v.Op = OpAMD64VPSHRDVW128 + return true + case OpShiftRightAndFillUpperFromUint32x16: + v.Op = OpAMD64VPSHRDVD512 + return true + case OpShiftRightAndFillUpperFromUint32x4: + v.Op = OpAMD64VPSHRDVD128 + return true + case OpShiftRightAndFillUpperFromUint32x8: + v.Op = OpAMD64VPSHRDVD256 + return true + case OpShiftRightAndFillUpperFromUint64x2: + v.Op = OpAMD64VPSHRDVQ128 + return true + case OpShiftRightAndFillUpperFromUint64x4: + v.Op = OpAMD64VPSHRDVQ256 + return true + case OpShiftRightAndFillUpperFromUint64x8: + v.Op = OpAMD64VPSHRDVQ512 + return true + case OpShiftRightInt16x16: + v.Op = OpAMD64VPSRLVW256 + return true + case OpShiftRightInt16x32: + v.Op = OpAMD64VPSRLVW512 + return true + case OpShiftRightInt16x8: + v.Op = OpAMD64VPSRLVW128 + return true + case OpShiftRightInt32x16: + v.Op = OpAMD64VPSRLVD512 + return true + case OpShiftRightInt32x4: + v.Op = OpAMD64VPSRLVD128 + return true + case OpShiftRightInt32x8: + v.Op = OpAMD64VPSRLVD256 + return true + case OpShiftRightInt64x2: + v.Op = OpAMD64VPSRLVQ128 + return true + case OpShiftRightInt64x4: + v.Op = OpAMD64VPSRLVQ256 + return true + case OpShiftRightInt64x8: + v.Op = OpAMD64VPSRLVQ512 + return true + case OpShiftRightSignExtendedInt16x16: + v.Op = OpAMD64VPSRAVW256 + return true + case OpShiftRightSignExtendedInt16x32: + v.Op = OpAMD64VPSRAVW512 + return true + case OpShiftRightSignExtendedInt16x8: + v.Op = OpAMD64VPSRAVW128 + return true + case OpShiftRightSignExtendedInt32x16: + v.Op = OpAMD64VPSRAVD512 + return true + case OpShiftRightSignExtendedInt32x4: + v.Op = OpAMD64VPSRAVD128 + return true + case OpShiftRightSignExtendedInt32x8: + v.Op = OpAMD64VPSRAVD256 + return true + case OpShiftRightSignExtendedInt64x2: + v.Op = OpAMD64VPSRAVQ128 + return true + case OpShiftRightSignExtendedInt64x4: + v.Op = OpAMD64VPSRAVQ256 + return true + case OpShiftRightSignExtendedInt64x8: + v.Op = OpAMD64VPSRAVQ512 + return true + case OpShiftRightSignExtendedUint16x16: + v.Op = OpAMD64VPSRAVW256 + return true + case OpShiftRightSignExtendedUint16x32: + v.Op = OpAMD64VPSRAVW512 + return true + case OpShiftRightSignExtendedUint16x8: + v.Op = OpAMD64VPSRAVW128 + return true + case OpShiftRightSignExtendedUint32x16: + v.Op = OpAMD64VPSRAVD512 + return true + case OpShiftRightSignExtendedUint32x4: + v.Op = OpAMD64VPSRAVD128 + return true + case OpShiftRightSignExtendedUint32x8: + v.Op = OpAMD64VPSRAVD256 + return true + case OpShiftRightSignExtendedUint64x2: + v.Op = OpAMD64VPSRAVQ128 + return true + case OpShiftRightSignExtendedUint64x4: + v.Op = OpAMD64VPSRAVQ256 + return true + case OpShiftRightSignExtendedUint64x8: + v.Op = OpAMD64VPSRAVQ512 + return true + case OpShiftRightUint16x16: + v.Op = OpAMD64VPSRLVW256 + return true + case OpShiftRightUint16x32: + v.Op = OpAMD64VPSRLVW512 + return true + case OpShiftRightUint16x8: + v.Op = OpAMD64VPSRLVW128 + return true + case OpShiftRightUint32x16: + v.Op = OpAMD64VPSRLVD512 + return true + case OpShiftRightUint32x4: + v.Op = OpAMD64VPSRLVD128 + return true + case OpShiftRightUint32x8: + v.Op = OpAMD64VPSRLVD256 + return true + case OpShiftRightUint64x2: + v.Op = OpAMD64VPSRLVQ128 + return true + case OpShiftRightUint64x4: + v.Op = OpAMD64VPSRLVQ256 + return true + case OpShiftRightUint64x8: + v.Op = OpAMD64VPSRLVQ512 + return true case OpSignExt16to32: v.Op = OpAMD64MOVWQSX return true @@ -43973,6 +44918,870 @@ func rewriteValueAMD64_OpMaskedPopCountUint8x64(v *Value) bool { return true } } +func rewriteValueAMD64_OpMaskedRotateAllLeftInt32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateAllLeftInt32x16 [a] x mask) + // result: (VPROLDMasked512 [a] x (VPMOVVec32x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPROLDMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateAllLeftInt32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateAllLeftInt32x4 [a] x mask) + // result: (VPROLDMasked128 [a] x (VPMOVVec32x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPROLDMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateAllLeftInt32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateAllLeftInt32x8 [a] x mask) + // result: (VPROLDMasked256 [a] x (VPMOVVec32x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPROLDMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateAllLeftInt64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateAllLeftInt64x2 [a] x mask) + // result: (VPROLQMasked128 [a] x (VPMOVVec64x2ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPROLQMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateAllLeftInt64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateAllLeftInt64x4 [a] x mask) + // result: (VPROLQMasked256 [a] x (VPMOVVec64x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPROLQMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateAllLeftInt64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateAllLeftInt64x8 [a] x mask) + // result: (VPROLQMasked512 [a] x (VPMOVVec64x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPROLQMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateAllLeftUint32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateAllLeftUint32x16 [a] x mask) + // result: (VPROLDMasked512 [a] x (VPMOVVec32x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPROLDMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateAllLeftUint32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateAllLeftUint32x4 [a] x mask) + // result: (VPROLDMasked128 [a] x (VPMOVVec32x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPROLDMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateAllLeftUint32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateAllLeftUint32x8 [a] x mask) + // result: (VPROLDMasked256 [a] x (VPMOVVec32x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPROLDMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateAllLeftUint64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateAllLeftUint64x2 [a] x mask) + // result: (VPROLQMasked128 [a] x (VPMOVVec64x2ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPROLQMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateAllLeftUint64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateAllLeftUint64x4 [a] x mask) + // result: (VPROLQMasked256 [a] x (VPMOVVec64x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPROLQMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateAllLeftUint64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateAllLeftUint64x8 [a] x mask) + // result: (VPROLQMasked512 [a] x (VPMOVVec64x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPROLQMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateAllRightInt32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateAllRightInt32x16 [a] x mask) + // result: (VPRORDMasked512 [a] x (VPMOVVec32x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPRORDMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateAllRightInt32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateAllRightInt32x4 [a] x mask) + // result: (VPRORDMasked128 [a] x (VPMOVVec32x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPRORDMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateAllRightInt32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateAllRightInt32x8 [a] x mask) + // result: (VPRORDMasked256 [a] x (VPMOVVec32x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPRORDMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateAllRightInt64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateAllRightInt64x2 [a] x mask) + // result: (VPRORQMasked128 [a] x (VPMOVVec64x2ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPRORQMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateAllRightInt64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateAllRightInt64x4 [a] x mask) + // result: (VPRORQMasked256 [a] x (VPMOVVec64x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPRORQMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateAllRightInt64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateAllRightInt64x8 [a] x mask) + // result: (VPRORQMasked512 [a] x (VPMOVVec64x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPRORQMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateAllRightUint32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateAllRightUint32x16 [a] x mask) + // result: (VPRORDMasked512 [a] x (VPMOVVec32x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPRORDMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateAllRightUint32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateAllRightUint32x4 [a] x mask) + // result: (VPRORDMasked128 [a] x (VPMOVVec32x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPRORDMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateAllRightUint32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateAllRightUint32x8 [a] x mask) + // result: (VPRORDMasked256 [a] x (VPMOVVec32x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPRORDMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateAllRightUint64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateAllRightUint64x2 [a] x mask) + // result: (VPRORQMasked128 [a] x (VPMOVVec64x2ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPRORQMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateAllRightUint64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateAllRightUint64x4 [a] x mask) + // result: (VPRORQMasked256 [a] x (VPMOVVec64x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPRORQMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateAllRightUint64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateAllRightUint64x8 [a] x mask) + // result: (VPRORQMasked512 [a] x (VPMOVVec64x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPRORQMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateLeftInt32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateLeftInt32x16 x y mask) + // result: (VPROLVDMasked512 x y (VPMOVVec32x16ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPROLVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateLeftInt32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateLeftInt32x4 x y mask) + // result: (VPROLVDMasked128 x y (VPMOVVec32x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPROLVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateLeftInt32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateLeftInt32x8 x y mask) + // result: (VPROLVDMasked256 x y (VPMOVVec32x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPROLVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateLeftInt64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateLeftInt64x2 x y mask) + // result: (VPROLVQMasked128 x y (VPMOVVec64x2ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPROLVQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateLeftInt64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateLeftInt64x4 x y mask) + // result: (VPROLVQMasked256 x y (VPMOVVec64x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPROLVQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateLeftInt64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateLeftInt64x8 x y mask) + // result: (VPROLVQMasked512 x y (VPMOVVec64x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPROLVQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateLeftUint32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateLeftUint32x16 x y mask) + // result: (VPROLVDMasked512 x y (VPMOVVec32x16ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPROLVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateLeftUint32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateLeftUint32x4 x y mask) + // result: (VPROLVDMasked128 x y (VPMOVVec32x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPROLVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateLeftUint32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateLeftUint32x8 x y mask) + // result: (VPROLVDMasked256 x y (VPMOVVec32x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPROLVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateLeftUint64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateLeftUint64x2 x y mask) + // result: (VPROLVQMasked128 x y (VPMOVVec64x2ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPROLVQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateLeftUint64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateLeftUint64x4 x y mask) + // result: (VPROLVQMasked256 x y (VPMOVVec64x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPROLVQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateLeftUint64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateLeftUint64x8 x y mask) + // result: (VPROLVQMasked512 x y (VPMOVVec64x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPROLVQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateRightInt32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateRightInt32x16 x y mask) + // result: (VPRORVDMasked512 x y (VPMOVVec32x16ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPRORVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateRightInt32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateRightInt32x4 x y mask) + // result: (VPRORVDMasked128 x y (VPMOVVec32x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPRORVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateRightInt32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateRightInt32x8 x y mask) + // result: (VPRORVDMasked256 x y (VPMOVVec32x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPRORVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateRightInt64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateRightInt64x2 x y mask) + // result: (VPRORVQMasked128 x y (VPMOVVec64x2ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPRORVQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateRightInt64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateRightInt64x4 x y mask) + // result: (VPRORVQMasked256 x y (VPMOVVec64x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPRORVQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateRightInt64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateRightInt64x8 x y mask) + // result: (VPRORVQMasked512 x y (VPMOVVec64x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPRORVQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateRightUint32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateRightUint32x16 x y mask) + // result: (VPRORVDMasked512 x y (VPMOVVec32x16ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPRORVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateRightUint32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateRightUint32x4 x y mask) + // result: (VPRORVDMasked128 x y (VPMOVVec32x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPRORVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateRightUint32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateRightUint32x8 x y mask) + // result: (VPRORVDMasked256 x y (VPMOVVec32x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPRORVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateRightUint64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateRightUint64x2 x y mask) + // result: (VPRORVQMasked128 x y (VPMOVVec64x2ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPRORVQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateRightUint64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateRightUint64x4 x y mask) + // result: (VPRORVQMasked256 x y (VPMOVVec64x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPRORVQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRotateRightUint64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRotateRightUint64x8 x y mask) + // result: (VPRORVQMasked512 x y (VPMOVVec64x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPRORVQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} func rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -44855,6 +46664,2688 @@ func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint3 return true } } +func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt16x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllLeftAndFillUpperFromInt16x16 [a] x y mask) + // result: (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDWMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt16x32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllLeftAndFillUpperFromInt16x32 [a] x y mask) + // result: (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDWMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt16x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllLeftAndFillUpperFromInt16x8 [a] x y mask) + // result: (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDWMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllLeftAndFillUpperFromInt32x16 [a] x y mask) + // result: (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDDMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllLeftAndFillUpperFromInt32x4 [a] x y mask) + // result: (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDDMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllLeftAndFillUpperFromInt32x8 [a] x y mask) + // result: (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDDMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllLeftAndFillUpperFromInt64x2 [a] x y mask) + // result: (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDQMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllLeftAndFillUpperFromInt64x4 [a] x y mask) + // result: (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDQMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllLeftAndFillUpperFromInt64x8 [a] x y mask) + // result: (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDQMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint16x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllLeftAndFillUpperFromUint16x16 [a] x y mask) + // result: (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDWMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint16x32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllLeftAndFillUpperFromUint16x32 [a] x y mask) + // result: (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDWMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint16x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllLeftAndFillUpperFromUint16x8 [a] x y mask) + // result: (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDWMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllLeftAndFillUpperFromUint32x16 [a] x y mask) + // result: (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDDMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllLeftAndFillUpperFromUint32x4 [a] x y mask) + // result: (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDDMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllLeftAndFillUpperFromUint32x8 [a] x y mask) + // result: (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDDMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllLeftAndFillUpperFromUint64x2 [a] x y mask) + // result: (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDQMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllLeftAndFillUpperFromUint64x4 [a] x y mask) + // result: (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDQMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllLeftAndFillUpperFromUint64x8 [a] x y mask) + // result: (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDQMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllLeftInt64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllLeftInt64x2 x y mask) + // result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllLeftInt64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllLeftInt64x4 x y mask) + // result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllLeftInt64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllLeftInt64x8 x y mask) + // result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllLeftUint64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllLeftUint64x2 x y mask) + // result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllLeftUint64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllLeftUint64x4 x y mask) + // result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllLeftUint64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllLeftUint64x8 x y mask) + // result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt16x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightAndFillUpperFromInt16x16 [a] x y mask) + // result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHRDWMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt16x32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightAndFillUpperFromInt16x32 [a] x y mask) + // result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHRDWMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt16x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightAndFillUpperFromInt16x8 [a] x y mask) + // result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHRDWMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightAndFillUpperFromInt32x16 [a] x y mask) + // result: (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHRDDMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightAndFillUpperFromInt32x4 [a] x y mask) + // result: (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHRDDMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightAndFillUpperFromInt32x8 [a] x y mask) + // result: (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHRDDMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightAndFillUpperFromInt64x2 [a] x y mask) + // result: (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHRDQMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightAndFillUpperFromInt64x4 [a] x y mask) + // result: (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHRDQMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightAndFillUpperFromInt64x8 [a] x y mask) + // result: (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHRDQMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint16x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightAndFillUpperFromUint16x16 [a] x y mask) + // result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHRDWMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint16x32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightAndFillUpperFromUint16x32 [a] x y mask) + // result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHRDWMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint16x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightAndFillUpperFromUint16x8 [a] x y mask) + // result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHRDWMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightAndFillUpperFromUint32x16 [a] x y mask) + // result: (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHRDDMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightAndFillUpperFromUint32x4 [a] x y mask) + // result: (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHRDDMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightAndFillUpperFromUint32x8 [a] x y mask) + // result: (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHRDDMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightAndFillUpperFromUint64x2 [a] x y mask) + // result: (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHRDQMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightAndFillUpperFromUint64x4 [a] x y mask) + // result: (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHRDQMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightAndFillUpperFromUint64x8 [a] x y mask) + // result: (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHRDQMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightInt64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightInt64x2 x y mask) + // result: (VPSRLQMasked128 x y (VPMOVVec64x2ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightInt64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightInt64x4 x y mask) + // result: (VPSRLQMasked256 x y (VPMOVVec64x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightInt64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightInt64x8 x y mask) + // result: (VPSRLQMasked512 x y (VPMOVVec64x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightSignExtendedInt64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightSignExtendedInt64x2 x y mask) + // result: (VPSRAQMasked128 x y (VPMOVVec64x2ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightSignExtendedInt64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightSignExtendedInt64x4 x y mask) + // result: (VPSRAQMasked256 x y (VPMOVVec64x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightSignExtendedInt64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightSignExtendedInt64x8 x y mask) + // result: (VPSRAQMasked512 x y (VPMOVVec64x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightUint64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightUint64x2 x y mask) + // result: (VPSRLQMasked128 x y (VPMOVVec64x2ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightUint64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightUint64x4 x y mask) + // result: (VPSRLQMasked256 x y (VPMOVVec64x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftAllRightUint64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftAllRightUint64x8 x y mask) + // result: (VPSRLQMasked512 x y (VPMOVVec64x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt16x16(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftAndFillUpperFromInt16x16 x y z mask) + // result: (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt16x32(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftAndFillUpperFromInt16x32 x y z mask) + // result: (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt16x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftAndFillUpperFromInt16x8 x y z mask) + // result: (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt32x16(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftAndFillUpperFromInt32x16 x y z mask) + // result: (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt32x4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftAndFillUpperFromInt32x4 x y z mask) + // result: (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt32x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftAndFillUpperFromInt32x8 x y z mask) + // result: (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt64x2(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftAndFillUpperFromInt64x2 x y z mask) + // result: (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt64x4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftAndFillUpperFromInt64x4 x y z mask) + // result: (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt64x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftAndFillUpperFromInt64x8 x y z mask) + // result: (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint16x16(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftAndFillUpperFromUint16x16 x y z mask) + // result: (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint16x32(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftAndFillUpperFromUint16x32 x y z mask) + // result: (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint16x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftAndFillUpperFromUint16x8 x y z mask) + // result: (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint32x16(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftAndFillUpperFromUint32x16 x y z mask) + // result: (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint32x4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftAndFillUpperFromUint32x4 x y z mask) + // result: (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint32x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftAndFillUpperFromUint32x8 x y z mask) + // result: (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint64x2(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftAndFillUpperFromUint64x2 x y z mask) + // result: (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint64x4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftAndFillUpperFromUint64x4 x y z mask) + // result: (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint64x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftAndFillUpperFromUint64x8 x y z mask) + // result: (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftInt16x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftInt16x16 x y mask) + // result: (VPSLLVWMasked256 x y (VPMOVVec16x16ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLVWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftInt16x32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftInt16x32 x y mask) + // result: (VPSLLVWMasked512 x y (VPMOVVec16x32ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLVWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftInt16x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftInt16x8 x y mask) + // result: (VPSLLVWMasked128 x y (VPMOVVec16x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLVWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftInt32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftInt32x16 x y mask) + // result: (VPSLLVDMasked512 x y (VPMOVVec32x16ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftInt32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftInt32x4 x y mask) + // result: (VPSLLVDMasked128 x y (VPMOVVec32x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftInt32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftInt32x8 x y mask) + // result: (VPSLLVDMasked256 x y (VPMOVVec32x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftInt64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftInt64x2 x y mask) + // result: (VPSLLVQMasked128 x y (VPMOVVec64x2ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLVQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftInt64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftInt64x4 x y mask) + // result: (VPSLLVQMasked256 x y (VPMOVVec64x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLVQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftInt64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftInt64x8 x y mask) + // result: (VPSLLVQMasked512 x y (VPMOVVec64x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLVQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftUint16x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftUint16x16 x y mask) + // result: (VPSLLVWMasked256 x y (VPMOVVec16x16ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLVWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftUint16x32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftUint16x32 x y mask) + // result: (VPSLLVWMasked512 x y (VPMOVVec16x32ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLVWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftUint16x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftUint16x8 x y mask) + // result: (VPSLLVWMasked128 x y (VPMOVVec16x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLVWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftUint32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftUint32x16 x y mask) + // result: (VPSLLVDMasked512 x y (VPMOVVec32x16ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftUint32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftUint32x4 x y mask) + // result: (VPSLLVDMasked128 x y (VPMOVVec32x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftUint32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftUint32x8 x y mask) + // result: (VPSLLVDMasked256 x y (VPMOVVec32x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftUint64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftUint64x2 x y mask) + // result: (VPSLLVQMasked128 x y (VPMOVVec64x2ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLVQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftUint64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftUint64x4 x y mask) + // result: (VPSLLVQMasked256 x y (VPMOVVec64x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLVQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftLeftUint64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftLeftUint64x8 x y mask) + // result: (VPSLLVQMasked512 x y (VPMOVVec64x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLVQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt16x16(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightAndFillUpperFromInt16x16 x y z mask) + // result: (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt16x32(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightAndFillUpperFromInt16x32 x y z mask) + // result: (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt16x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightAndFillUpperFromInt16x8 x y z mask) + // result: (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt32x16(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightAndFillUpperFromInt32x16 x y z mask) + // result: (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt32x4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightAndFillUpperFromInt32x4 x y z mask) + // result: (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt32x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightAndFillUpperFromInt32x8 x y z mask) + // result: (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt64x2(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightAndFillUpperFromInt64x2 x y z mask) + // result: (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt64x4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightAndFillUpperFromInt64x4 x y z mask) + // result: (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt64x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightAndFillUpperFromInt64x8 x y z mask) + // result: (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint16x16(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightAndFillUpperFromUint16x16 x y z mask) + // result: (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint16x32(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightAndFillUpperFromUint16x32 x y z mask) + // result: (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint16x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightAndFillUpperFromUint16x8 x y z mask) + // result: (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint32x16(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightAndFillUpperFromUint32x16 x y z mask) + // result: (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint32x4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightAndFillUpperFromUint32x4 x y z mask) + // result: (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint32x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightAndFillUpperFromUint32x8 x y z mask) + // result: (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint64x2(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightAndFillUpperFromUint64x2 x y z mask) + // result: (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint64x4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightAndFillUpperFromUint64x4 x y z mask) + // result: (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint64x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightAndFillUpperFromUint64x8 x y z mask) + // result: (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightInt16x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightInt16x16 x y mask) + // result: (VPSRLVWMasked256 x y (VPMOVVec16x16ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLVWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightInt16x32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightInt16x32 x y mask) + // result: (VPSRLVWMasked512 x y (VPMOVVec16x32ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLVWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightInt16x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightInt16x8 x y mask) + // result: (VPSRLVWMasked128 x y (VPMOVVec16x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLVWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightInt32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightInt32x16 x y mask) + // result: (VPSRLVDMasked512 x y (VPMOVVec32x16ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightInt32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightInt32x4 x y mask) + // result: (VPSRLVDMasked128 x y (VPMOVVec32x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightInt32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightInt32x8 x y mask) + // result: (VPSRLVDMasked256 x y (VPMOVVec32x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightInt64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightInt64x2 x y mask) + // result: (VPSRLVQMasked128 x y (VPMOVVec64x2ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLVQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightInt64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightInt64x4 x y mask) + // result: (VPSRLVQMasked256 x y (VPMOVVec64x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLVQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightInt64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightInt64x8 x y mask) + // result: (VPSRLVQMasked512 x y (VPMOVVec64x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLVQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt16x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightSignExtendedInt16x16 x y mask) + // result: (VPSRAVWMasked256 x y (VPMOVVec16x16ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAVWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt16x32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightSignExtendedInt16x32 x y mask) + // result: (VPSRAVWMasked512 x y (VPMOVVec16x32ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAVWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt16x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightSignExtendedInt16x8 x y mask) + // result: (VPSRAVWMasked128 x y (VPMOVVec16x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAVWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightSignExtendedInt32x16 x y mask) + // result: (VPSRAVDMasked512 x y (VPMOVVec32x16ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightSignExtendedInt32x4 x y mask) + // result: (VPSRAVDMasked128 x y (VPMOVVec32x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightSignExtendedInt32x8 x y mask) + // result: (VPSRAVDMasked256 x y (VPMOVVec32x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightSignExtendedInt64x2 x y mask) + // result: (VPSRAVQMasked128 x y (VPMOVVec64x2ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAVQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightSignExtendedInt64x4 x y mask) + // result: (VPSRAVQMasked256 x y (VPMOVVec64x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAVQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightSignExtendedInt64x8 x y mask) + // result: (VPSRAVQMasked512 x y (VPMOVVec64x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAVQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint16x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightSignExtendedUint16x16 x y mask) + // result: (VPSRAVWMasked256 x y (VPMOVVec16x16ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAVWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint16x32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightSignExtendedUint16x32 x y mask) + // result: (VPSRAVWMasked512 x y (VPMOVVec16x32ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAVWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint16x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightSignExtendedUint16x8 x y mask) + // result: (VPSRAVWMasked128 x y (VPMOVVec16x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAVWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightSignExtendedUint32x16 x y mask) + // result: (VPSRAVDMasked512 x y (VPMOVVec32x16ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightSignExtendedUint32x4 x y mask) + // result: (VPSRAVDMasked128 x y (VPMOVVec32x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightSignExtendedUint32x8 x y mask) + // result: (VPSRAVDMasked256 x y (VPMOVVec32x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightSignExtendedUint64x2 x y mask) + // result: (VPSRAVQMasked128 x y (VPMOVVec64x2ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAVQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightSignExtendedUint64x4 x y mask) + // result: (VPSRAVQMasked256 x y (VPMOVVec64x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAVQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightSignExtendedUint64x8 x y mask) + // result: (VPSRAVQMasked512 x y (VPMOVVec64x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAVQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightUint16x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightUint16x16 x y mask) + // result: (VPSRLVWMasked256 x y (VPMOVVec16x16ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLVWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightUint16x32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightUint16x32 x y mask) + // result: (VPSRLVWMasked512 x y (VPMOVVec16x32ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLVWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightUint16x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightUint16x8 x y mask) + // result: (VPSRLVWMasked128 x y (VPMOVVec16x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLVWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightUint32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightUint32x16 x y mask) + // result: (VPSRLVDMasked512 x y (VPMOVVec32x16ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightUint32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightUint32x4 x y mask) + // result: (VPSRLVDMasked128 x y (VPMOVVec32x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightUint32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightUint32x8 x y mask) + // result: (VPSRLVDMasked256 x y (VPMOVVec32x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightUint64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightUint64x2 x y mask) + // result: (VPSRLVQMasked128 x y (VPMOVVec64x2ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLVQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightUint64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightUint64x4 x y mask) + // result: (VPSRLVQMasked256 x y (VPMOVVec64x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLVQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedShiftRightUint64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedShiftRightUint64x8 x y mask) + // result: (VPSRLVQMasked512 x y (VPMOVVec64x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLVQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} func rewriteValueAMD64_OpMaskedSqrtFloat32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -47629,6 +52120,318 @@ func rewriteValueAMD64_OpPopCount8(v *Value) bool { return true } } +func rewriteValueAMD64_OpRotateAllLeftInt32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllLeftInt32x16 [a] x) + // result: (VPROLD512 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPROLD512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRotateAllLeftInt32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllLeftInt32x4 [a] x) + // result: (VPROLD128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPROLD128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRotateAllLeftInt32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllLeftInt32x8 [a] x) + // result: (VPROLD256 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPROLD256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRotateAllLeftInt64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllLeftInt64x2 [a] x) + // result: (VPROLQ128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPROLQ128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRotateAllLeftInt64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllLeftInt64x4 [a] x) + // result: (VPROLQ256 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPROLQ256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRotateAllLeftInt64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllLeftInt64x8 [a] x) + // result: (VPROLQ512 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPROLQ512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRotateAllLeftUint32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllLeftUint32x16 [a] x) + // result: (VPROLD512 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPROLD512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRotateAllLeftUint32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllLeftUint32x4 [a] x) + // result: (VPROLD128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPROLD128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRotateAllLeftUint32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllLeftUint32x8 [a] x) + // result: (VPROLD256 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPROLD256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRotateAllLeftUint64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllLeftUint64x2 [a] x) + // result: (VPROLQ128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPROLQ128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRotateAllLeftUint64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllLeftUint64x4 [a] x) + // result: (VPROLQ256 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPROLQ256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRotateAllLeftUint64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllLeftUint64x8 [a] x) + // result: (VPROLQ512 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPROLQ512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRotateAllRightInt32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllRightInt32x16 [a] x) + // result: (VPRORD512 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPRORD512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRotateAllRightInt32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllRightInt32x4 [a] x) + // result: (VPRORD128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPRORD128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRotateAllRightInt32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllRightInt32x8 [a] x) + // result: (VPRORD256 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPRORD256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRotateAllRightInt64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllRightInt64x2 [a] x) + // result: (VPRORQ128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPRORQ128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRotateAllRightInt64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllRightInt64x4 [a] x) + // result: (VPRORQ256 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPRORQ256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRotateAllRightInt64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllRightInt64x8 [a] x) + // result: (VPRORQ512 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPRORQ512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRotateAllRightUint32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllRightUint32x16 [a] x) + // result: (VPRORD512 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPRORD512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRotateAllRightUint32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllRightUint32x4 [a] x) + // result: (VPRORD128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPRORD128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRotateAllRightUint32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllRightUint32x8 [a] x) + // result: (VPRORD256 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPRORD256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRotateAllRightUint64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllRightUint64x2 [a] x) + // result: (VPRORQ128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPRORQ128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRotateAllRightUint64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllRightUint64x4 [a] x) + // result: (VPRORQ256 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPRORQ256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRotateAllRightUint64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllRightUint64x8 [a] x) + // result: (VPRORQ512 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPRORQ512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} func rewriteValueAMD64_OpRoundFloat32x4(v *Value) bool { v_0 := v.Args[0] // match: (RoundFloat32x4 x) @@ -49718,6 +54521,546 @@ func rewriteValueAMD64_OpSetElemUint8x16(v *Value) bool { return true } } +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt16x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftAndFillUpperFromInt16x16 [a] x y) + // result: (VPSHLDW256 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHLDW256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftAndFillUpperFromInt16x32 [a] x y) + // result: (VPSHLDW512 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHLDW512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt16x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftAndFillUpperFromInt16x8 [a] x y) + // result: (VPSHLDW128 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHLDW128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftAndFillUpperFromInt32x16 [a] x y) + // result: (VPSHLDD512 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHLDD512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftAndFillUpperFromInt32x4 [a] x y) + // result: (VPSHLDD128 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHLDD128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftAndFillUpperFromInt32x8 [a] x y) + // result: (VPSHLDD256 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHLDD256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftAndFillUpperFromInt64x2 [a] x y) + // result: (VPSHLDQ128 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHLDQ128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftAndFillUpperFromInt64x4 [a] x y) + // result: (VPSHLDQ256 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHLDQ256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftAndFillUpperFromInt64x8 [a] x y) + // result: (VPSHLDQ512 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHLDQ512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint16x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftAndFillUpperFromUint16x16 [a] x y) + // result: (VPSHLDW256 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHLDW256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftAndFillUpperFromUint16x32 [a] x y) + // result: (VPSHLDW512 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHLDW512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint16x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftAndFillUpperFromUint16x8 [a] x y) + // result: (VPSHLDW128 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHLDW128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftAndFillUpperFromUint32x16 [a] x y) + // result: (VPSHLDD512 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHLDD512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftAndFillUpperFromUint32x4 [a] x y) + // result: (VPSHLDD128 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHLDD128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftAndFillUpperFromUint32x8 [a] x y) + // result: (VPSHLDD256 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHLDD256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftAndFillUpperFromUint64x2 [a] x y) + // result: (VPSHLDQ128 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHLDQ128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftAndFillUpperFromUint64x4 [a] x y) + // result: (VPSHLDQ256 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHLDQ256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftAndFillUpperFromUint64x8 [a] x y) + // result: (VPSHLDQ512 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHLDQ512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt16x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightAndFillUpperFromInt16x16 [a] x y) + // result: (VPSHRDW256 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHRDW256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightAndFillUpperFromInt16x32 [a] x y) + // result: (VPSHRDW512 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHRDW512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt16x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightAndFillUpperFromInt16x8 [a] x y) + // result: (VPSHRDW128 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHRDW128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightAndFillUpperFromInt32x16 [a] x y) + // result: (VPSHRDD512 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHRDD512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightAndFillUpperFromInt32x4 [a] x y) + // result: (VPSHRDD128 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHRDD128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightAndFillUpperFromInt32x8 [a] x y) + // result: (VPSHRDD256 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHRDD256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightAndFillUpperFromInt64x2 [a] x y) + // result: (VPSHRDQ128 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHRDQ128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightAndFillUpperFromInt64x4 [a] x y) + // result: (VPSHRDQ256 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHRDQ256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightAndFillUpperFromInt64x8 [a] x y) + // result: (VPSHRDQ512 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHRDQ512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint16x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightAndFillUpperFromUint16x16 [a] x y) + // result: (VPSHRDW256 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHRDW256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightAndFillUpperFromUint16x32 [a] x y) + // result: (VPSHRDW512 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHRDW512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint16x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightAndFillUpperFromUint16x8 [a] x y) + // result: (VPSHRDW128 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHRDW128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightAndFillUpperFromUint32x16 [a] x y) + // result: (VPSHRDD512 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHRDD512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightAndFillUpperFromUint32x4 [a] x y) + // result: (VPSHRDD128 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHRDD128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightAndFillUpperFromUint32x8 [a] x y) + // result: (VPSHRDD256 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHRDD256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightAndFillUpperFromUint64x2 [a] x y) + // result: (VPSHRDQ128 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHRDQ128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightAndFillUpperFromUint64x4 [a] x y) + // result: (VPSHRDQ256 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHRDQ256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightAndFillUpperFromUint64x8 [a] x y) + // result: (VPSHRDQ512 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VPSHRDQ512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} func rewriteValueAMD64_OpSlicemask(v *Value) bool { v_0 := v.Args[0] b := v.Block diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go index 5d6ae7e3c06..d20c9392936 100644 --- a/src/cmd/compile/internal/ssagen/simdintrinsics.go +++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go @@ -915,6 +915,54 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftInt32x4, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int32x8.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftInt32x8, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int32x16.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftInt32x16, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int64x2.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftInt64x2, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int64x4.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftInt64x4, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int64x8.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftInt64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint32x4.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftUint32x4, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint32x8.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftUint32x8, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint32x16.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftUint32x16, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint64x2.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftUint64x2, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint64x4.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftUint64x4, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint64x8.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftUint64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int32x4.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightInt32x4, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int32x8.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightInt32x8, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int32x16.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightInt32x16, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int64x2.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightInt64x2, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int64x4.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightInt64x4, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int64x8.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightInt64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint32x4.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightUint32x4, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint32x8.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightUint32x8, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint32x16.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightUint32x16, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint64x2.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightUint64x2, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint64x4.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightUint64x4, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint64x8.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightUint64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int32x4.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float32x16.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) @@ -963,6 +1011,147 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint32x4.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint32x8.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint32x16.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.MaskedShiftAllLeft", opLen3(ssa.OpMaskedShiftAllLeftInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.MaskedShiftAllLeft", opLen3(ssa.OpMaskedShiftAllLeftInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.MaskedShiftAllLeft", opLen3(ssa.OpMaskedShiftAllLeftInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.MaskedShiftAllLeft", opLen3(ssa.OpMaskedShiftAllLeftUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.MaskedShiftAllLeft", opLen3(ssa.OpMaskedShiftAllLeftUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.MaskedShiftAllLeft", opLen3(ssa.OpMaskedShiftAllLeftUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt16x8, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int16x16.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt16x16, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int16x32.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt16x32, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int32x4.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt32x4, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int32x8.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt32x8, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int32x16.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt32x16, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int64x2.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt64x2, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int64x4.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt64x4, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int64x8.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint16x8.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint16x8, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint16x16.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint16x16, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint16x32.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint16x32, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint32x4.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint32x4, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint32x8.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint32x8, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint32x16.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint32x16, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint64x2.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint64x2, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint64x4.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint64x4, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint64x8.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int64x2.MaskedShiftAllRight", opLen3(ssa.OpMaskedShiftAllRightInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.MaskedShiftAllRight", opLen3(ssa.OpMaskedShiftAllRightInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.MaskedShiftAllRight", opLen3(ssa.OpMaskedShiftAllRightInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.MaskedShiftAllRight", opLen3(ssa.OpMaskedShiftAllRightUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.MaskedShiftAllRight", opLen3(ssa.OpMaskedShiftAllRightUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.MaskedShiftAllRight", opLen3(ssa.OpMaskedShiftAllRightUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt16x8, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int16x16.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt16x16, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int16x32.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt16x32, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int32x4.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt32x4, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int32x8.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt32x8, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int32x16.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt32x16, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int64x2.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt64x2, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int64x4.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt64x4, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int64x8.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint16x8.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint16x8, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint16x16.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint16x16, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint16x32.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint16x32, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint32x4.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint32x4, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint32x8.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint32x8, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint32x16.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint32x16, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint64x2.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint64x2, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint64x4.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint64x4, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint64x8.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int64x2.MaskedShiftAllRightSignExtended", opLen3(ssa.OpMaskedShiftAllRightSignExtendedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.MaskedShiftAllRightSignExtended", opLen3(ssa.OpMaskedShiftAllRightSignExtendedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.MaskedShiftAllRightSignExtended", opLen3(ssa.OpMaskedShiftAllRightSignExtendedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.MaskedSqrt", opLen2(ssa.OpMaskedSqrtFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.MaskedSqrt", opLen2(ssa.OpMaskedSqrtFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x16.MaskedSqrt", opLen2(ssa.OpMaskedSqrtFloat32x16, types.TypeVec512), sys.AMD64) @@ -1242,6 +1431,54 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.PopCount", opLen1(ssa.OpPopCountUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.PopCount", opLen1(ssa.OpPopCountUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.PopCount", opLen1(ssa.OpPopCountUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x4, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int32x8.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x8, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int32x16.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x16, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int64x2.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt64x2, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int64x4.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt64x4, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int64x8.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint32x4.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint32x4, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint32x8.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint32x8, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint32x16.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint32x16, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint64x2.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint64x2, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint64x4.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint64x4, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint64x8.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int32x4.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt32x4, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int32x8.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt32x8, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int32x16.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt32x16, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int64x2.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt64x2, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int64x4.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt64x4, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int64x8.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint32x4.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint32x4, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint32x8.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint32x8, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint32x16.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint32x16, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint64x2.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint64x2, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint64x4.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint64x4, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint64x8.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int32x4.RotateLeft", opLen2(ssa.OpRotateLeftInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.RotateLeft", opLen2(ssa.OpRotateLeftInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.RotateLeft", opLen2(ssa.OpRotateLeftInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.RotateLeft", opLen2(ssa.OpRotateLeftInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.RotateLeft", opLen2(ssa.OpRotateLeftInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.RotateLeft", opLen2(ssa.OpRotateLeftInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.RotateLeft", opLen2(ssa.OpRotateLeftUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.RotateLeft", opLen2(ssa.OpRotateLeftUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.RotateLeft", opLen2(ssa.OpRotateLeftUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.RotateLeft", opLen2(ssa.OpRotateLeftUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.RotateLeft", opLen2(ssa.OpRotateLeftUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.RotateLeft", opLen2(ssa.OpRotateLeftUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.RotateRight", opLen2(ssa.OpRotateRightInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.RotateRight", opLen2(ssa.OpRotateRightInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.RotateRight", opLen2(ssa.OpRotateRightInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.RotateRight", opLen2(ssa.OpRotateRightInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.RotateRight", opLen2(ssa.OpRotateRightInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.RotateRight", opLen2(ssa.OpRotateRightInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.RotateRight", opLen2(ssa.OpRotateRightUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.RotateRight", opLen2(ssa.OpRotateRightUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.RotateRight", opLen2(ssa.OpRotateRightUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.RotateRight", opLen2(ssa.OpRotateRightUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.RotateRight", opLen2(ssa.OpRotateRightUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.RotateRight", opLen2(ssa.OpRotateRightUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.Round", opLen1(ssa.OpRoundFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.Round", opLen1(ssa.OpRoundFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x2.Round", opLen1(ssa.OpRoundFloat64x2, types.TypeVec128), sys.AMD64) @@ -1306,6 +1543,167 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint16x8.SetElem", opLen2Imm8(ssa.OpSetElemUint16x8, types.TypeVec128, 0), sys.AMD64) addF(simdPackage, "Uint32x4.SetElem", opLen2Imm8(ssa.OpSetElemUint32x4, types.TypeVec128, 0), sys.AMD64) addF(simdPackage, "Uint64x2.SetElem", opLen2Imm8(ssa.OpSetElemUint64x2, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int16x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x4.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x2.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x4.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x2.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt16x8, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int16x16.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt16x16, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int16x32.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt16x32, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int32x4.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt32x4, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int32x8.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt32x8, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int32x16.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt32x16, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int64x2.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt64x2, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int64x4.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt64x4, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int64x8.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint16x8.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint16x8, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint16x16.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint16x16, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint16x32.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint16x32, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint32x4.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint32x4, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint32x8.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint32x8, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint32x16.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint32x16, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint64x2.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint64x2, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint64x4.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint64x4, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint64x8.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int16x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x4.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x2.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x4.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x2.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt16x8, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int16x16.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt16x16, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int16x32.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt16x32, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int32x4.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt32x4, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int32x8.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt32x8, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int32x16.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt32x16, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int64x2.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt64x2, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int64x4.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt64x4, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int64x8.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint16x8.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint16x8, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint16x16.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint16x16, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint16x32.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint16x32, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint32x4.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint32x4, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint32x8.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint32x8, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint32x16.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint32x16, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint64x2.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint64x2, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint64x4.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint64x4, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint64x8.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int16x8.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x4.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x2.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.ShiftLeft", opLen2(ssa.OpShiftLeftInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.ShiftLeft", opLen2(ssa.OpShiftLeftInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.ShiftLeft", opLen2(ssa.OpShiftLeftInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.ShiftLeft", opLen2(ssa.OpShiftLeftInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.ShiftLeft", opLen2(ssa.OpShiftLeftInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.ShiftLeft", opLen2(ssa.OpShiftLeftInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.ShiftLeft", opLen2(ssa.OpShiftLeftUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.ShiftLeft", opLen2(ssa.OpShiftLeftUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.ShiftLeft", opLen2(ssa.OpShiftLeftUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.ShiftLeft", opLen2(ssa.OpShiftLeftUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.ShiftLeft", opLen2(ssa.OpShiftLeftUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.ShiftLeft", opLen2(ssa.OpShiftLeftUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.ShiftLeft", opLen2(ssa.OpShiftLeftUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.ShiftLeft", opLen2(ssa.OpShiftLeftUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.ShiftLeft", opLen2(ssa.OpShiftLeftUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.ShiftRight", opLen2(ssa.OpShiftRightInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.ShiftRight", opLen2(ssa.OpShiftRightInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.ShiftRight", opLen2(ssa.OpShiftRightInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.ShiftRight", opLen2(ssa.OpShiftRightInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.ShiftRight", opLen2(ssa.OpShiftRightInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.ShiftRight", opLen2(ssa.OpShiftRightInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.ShiftRight", opLen2(ssa.OpShiftRightInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.ShiftRight", opLen2(ssa.OpShiftRightInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.ShiftRight", opLen2(ssa.OpShiftRightInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.ShiftRight", opLen2(ssa.OpShiftRightUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.ShiftRight", opLen2(ssa.OpShiftRightUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.ShiftRight", opLen2(ssa.OpShiftRightUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.ShiftRight", opLen2(ssa.OpShiftRightUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.ShiftRight", opLen2(ssa.OpShiftRightUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.ShiftRight", opLen2(ssa.OpShiftRightUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.ShiftRight", opLen2(ssa.OpShiftRightUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.ShiftRight", opLen2(ssa.OpShiftRightUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.ShiftRight", opLen2(ssa.OpShiftRightUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x16.Sign", opLen2(ssa.OpSignInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x32.Sign", opLen2(ssa.OpSignInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x8.Sign", opLen2(ssa.OpSignInt16x8, types.TypeVec128), sys.AMD64) diff --git a/src/simd/simd_wrapped_test.go b/src/simd/simd_wrapped_test.go index b5f6bb517a3..ad828e9d3f8 100644 --- a/src/simd/simd_wrapped_test.go +++ b/src/simd/simd_wrapped_test.go @@ -2147,6 +2147,12 @@ func testInt16x8Binary(t *testing.T, v0 []int16, v1 []int16, want []int16, which gotv = vec0.SaturatedPairwiseSub(vec1) case "SaturatedSub": gotv = vec0.SaturatedSub(vec1) + case "ShiftLeft": + gotv = vec0.ShiftLeft(vec1) + case "ShiftRight": + gotv = vec0.ShiftRight(vec1) + case "ShiftRightSignExtended": + gotv = vec0.ShiftRightSignExtended(vec1) case "Sign": gotv = vec0.Sign(vec1) case "Sub": @@ -2187,6 +2193,12 @@ func testInt16x8BinaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16, w gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask16x8()) case "MaskedSaturatedSub": gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask16x8()) + case "MaskedShiftLeft": + gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask16x8()) + case "MaskedShiftRight": + gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask16x8()) + case "MaskedShiftRightSignExtended": + gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask16x8()) case "MaskedSub": gotv = vec0.MaskedSub(vec1, vec2.AsMask16x8()) @@ -2307,6 +2319,55 @@ func testInt16x8MaskedCompare(t *testing.T, v0 []int16, v1 []int16, v2 []int16, } } +func testInt16x8Ternary(t *testing.T, v0 []int16, v1 []int16, v2 []int16, want []int16, which string) { + t.Helper() + var gotv simd.Int16x8 + got := make([]int16, len(want)) + vec0 := simd.LoadInt16x8Slice(v0) + vec1 := simd.LoadInt16x8Slice(v1) + vec2 := simd.LoadInt16x8Slice(v2) + switch which { + case "ShiftLeftAndFillUpperFrom": + gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2) + case "ShiftRightAndFillUpperFrom": + gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2) + + default: + t.Errorf("Unknown method: Int16x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt16x8TernaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16, v3 []int16, want []int16, which string) { + t.Helper() + var gotv simd.Int16x8 + got := make([]int16, len(want)) + vec0 := simd.LoadInt16x8Slice(v0) + vec1 := simd.LoadInt16x8Slice(v1) + vec2 := simd.LoadInt16x8Slice(v2) + vec3 := simd.LoadInt16x8Slice(v3) + switch which { + case "MaskedShiftLeftAndFillUpperFrom": + gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask16x8()) + case "MaskedShiftRightAndFillUpperFrom": + gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask16x8()) + + default: + t.Errorf("Unknown method: Int16x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testInt16x8Unary(t *testing.T, v0 []int16, want []int16, which string) { t.Helper() var gotv simd.Int16x8 @@ -2387,6 +2448,12 @@ func testInt16x16Binary(t *testing.T, v0 []int16, v1 []int16, want []int16, whic gotv = vec0.SaturatedPairwiseSub(vec1) case "SaturatedSub": gotv = vec0.SaturatedSub(vec1) + case "ShiftLeft": + gotv = vec0.ShiftLeft(vec1) + case "ShiftRight": + gotv = vec0.ShiftRight(vec1) + case "ShiftRightSignExtended": + gotv = vec0.ShiftRightSignExtended(vec1) case "Sign": gotv = vec0.Sign(vec1) case "Sub": @@ -2427,6 +2494,12 @@ func testInt16x16BinaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16, gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask16x16()) case "MaskedSaturatedSub": gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask16x16()) + case "MaskedShiftLeft": + gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask16x16()) + case "MaskedShiftRight": + gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask16x16()) + case "MaskedShiftRightSignExtended": + gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask16x16()) case "MaskedSub": gotv = vec0.MaskedSub(vec1, vec2.AsMask16x16()) @@ -2547,6 +2620,55 @@ func testInt16x16MaskedCompare(t *testing.T, v0 []int16, v1 []int16, v2 []int16, } } +func testInt16x16Ternary(t *testing.T, v0 []int16, v1 []int16, v2 []int16, want []int16, which string) { + t.Helper() + var gotv simd.Int16x16 + got := make([]int16, len(want)) + vec0 := simd.LoadInt16x16Slice(v0) + vec1 := simd.LoadInt16x16Slice(v1) + vec2 := simd.LoadInt16x16Slice(v2) + switch which { + case "ShiftLeftAndFillUpperFrom": + gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2) + case "ShiftRightAndFillUpperFrom": + gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2) + + default: + t.Errorf("Unknown method: Int16x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt16x16TernaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16, v3 []int16, want []int16, which string) { + t.Helper() + var gotv simd.Int16x16 + got := make([]int16, len(want)) + vec0 := simd.LoadInt16x16Slice(v0) + vec1 := simd.LoadInt16x16Slice(v1) + vec2 := simd.LoadInt16x16Slice(v2) + vec3 := simd.LoadInt16x16Slice(v3) + switch which { + case "MaskedShiftLeftAndFillUpperFrom": + gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask16x16()) + case "MaskedShiftRightAndFillUpperFrom": + gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask16x16()) + + default: + t.Errorf("Unknown method: Int16x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testInt16x16Unary(t *testing.T, v0 []int16, want []int16, which string) { t.Helper() var gotv simd.Int16x16 @@ -2613,6 +2735,12 @@ func testInt16x32Binary(t *testing.T, v0 []int16, v1 []int16, want []int16, whic gotv = vec0.SaturatedAdd(vec1) case "SaturatedSub": gotv = vec0.SaturatedSub(vec1) + case "ShiftLeft": + gotv = vec0.ShiftLeft(vec1) + case "ShiftRight": + gotv = vec0.ShiftRight(vec1) + case "ShiftRightSignExtended": + gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) @@ -2649,6 +2777,12 @@ func testInt16x32BinaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16, gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask16x32()) case "MaskedSaturatedSub": gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask16x32()) + case "MaskedShiftLeft": + gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask16x32()) + case "MaskedShiftRight": + gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask16x32()) + case "MaskedShiftRightSignExtended": + gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask16x32()) case "MaskedSub": gotv = vec0.MaskedSub(vec1, vec2.AsMask16x32()) @@ -2769,6 +2903,55 @@ func testInt16x32MaskedCompare(t *testing.T, v0 []int16, v1 []int16, v2 []int16, } } +func testInt16x32Ternary(t *testing.T, v0 []int16, v1 []int16, v2 []int16, want []int16, which string) { + t.Helper() + var gotv simd.Int16x32 + got := make([]int16, len(want)) + vec0 := simd.LoadInt16x32Slice(v0) + vec1 := simd.LoadInt16x32Slice(v1) + vec2 := simd.LoadInt16x32Slice(v2) + switch which { + case "ShiftLeftAndFillUpperFrom": + gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2) + case "ShiftRightAndFillUpperFrom": + gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2) + + default: + t.Errorf("Unknown method: Int16x32.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt16x32TernaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16, v3 []int16, want []int16, which string) { + t.Helper() + var gotv simd.Int16x32 + got := make([]int16, len(want)) + vec0 := simd.LoadInt16x32Slice(v0) + vec1 := simd.LoadInt16x32Slice(v1) + vec2 := simd.LoadInt16x32Slice(v2) + vec3 := simd.LoadInt16x32Slice(v3) + switch which { + case "MaskedShiftLeftAndFillUpperFrom": + gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask16x32()) + case "MaskedShiftRightAndFillUpperFrom": + gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask16x32()) + + default: + t.Errorf("Unknown method: Int16x32.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testInt16x32Unary(t *testing.T, v0 []int16, want []int16, which string) { t.Helper() var gotv simd.Int16x32 @@ -2839,6 +3022,16 @@ func testInt32x4Binary(t *testing.T, v0 []int32, v1 []int32, want []int32, which gotv = vec0.PairwiseAdd(vec1) case "PairwiseSub": gotv = vec0.PairwiseSub(vec1) + case "RotateLeft": + gotv = vec0.RotateLeft(vec1) + case "RotateRight": + gotv = vec0.RotateRight(vec1) + case "ShiftLeft": + gotv = vec0.ShiftLeft(vec1) + case "ShiftRight": + gotv = vec0.ShiftRight(vec1) + case "ShiftRightSignExtended": + gotv = vec0.ShiftRightSignExtended(vec1) case "Sign": gotv = vec0.Sign(vec1) case "Sub": @@ -2879,6 +3072,16 @@ func testInt32x4BinaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32, w gotv = vec0.MaskedMulLow(vec1, vec2.AsMask32x4()) case "MaskedOr": gotv = vec0.MaskedOr(vec1, vec2.AsMask32x4()) + case "MaskedRotateLeft": + gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask32x4()) + case "MaskedRotateRight": + gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask32x4()) + case "MaskedShiftLeft": + gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask32x4()) + case "MaskedShiftRight": + gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask32x4()) + case "MaskedShiftRightSignExtended": + gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask32x4()) case "MaskedSub": gotv = vec0.MaskedSub(vec1, vec2.AsMask32x4()) case "MaskedXor": @@ -3028,6 +3231,55 @@ func testInt32x4MaskedCompare(t *testing.T, v0 []int32, v1 []int32, v2 []int32, } } +func testInt32x4Ternary(t *testing.T, v0 []int32, v1 []int32, v2 []int32, want []int32, which string) { + t.Helper() + var gotv simd.Int32x4 + got := make([]int32, len(want)) + vec0 := simd.LoadInt32x4Slice(v0) + vec1 := simd.LoadInt32x4Slice(v1) + vec2 := simd.LoadInt32x4Slice(v2) + switch which { + case "ShiftLeftAndFillUpperFrom": + gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2) + case "ShiftRightAndFillUpperFrom": + gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2) + + default: + t.Errorf("Unknown method: Int32x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt32x4TernaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32, v3 []int32, want []int32, which string) { + t.Helper() + var gotv simd.Int32x4 + got := make([]int32, len(want)) + vec0 := simd.LoadInt32x4Slice(v0) + vec1 := simd.LoadInt32x4Slice(v1) + vec2 := simd.LoadInt32x4Slice(v2) + vec3 := simd.LoadInt32x4Slice(v3) + switch which { + case "MaskedShiftLeftAndFillUpperFrom": + gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask32x4()) + case "MaskedShiftRightAndFillUpperFrom": + gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask32x4()) + + default: + t.Errorf("Unknown method: Int32x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testInt32x4Uint8x16Int8x16Int32x4(t *testing.T, v0 []int32, v1 []uint8, v2 []int8, want []int32, which string) { t.Helper() var gotv simd.Int32x4 @@ -3147,6 +3399,16 @@ func testInt32x8Binary(t *testing.T, v0 []int32, v1 []int32, want []int32, which gotv = vec0.PairwiseAdd(vec1) case "PairwiseSub": gotv = vec0.PairwiseSub(vec1) + case "RotateLeft": + gotv = vec0.RotateLeft(vec1) + case "RotateRight": + gotv = vec0.RotateRight(vec1) + case "ShiftLeft": + gotv = vec0.ShiftLeft(vec1) + case "ShiftRight": + gotv = vec0.ShiftRight(vec1) + case "ShiftRightSignExtended": + gotv = vec0.ShiftRightSignExtended(vec1) case "Sign": gotv = vec0.Sign(vec1) case "Sub": @@ -3187,6 +3449,16 @@ func testInt32x8BinaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32, w gotv = vec0.MaskedMulLow(vec1, vec2.AsMask32x8()) case "MaskedOr": gotv = vec0.MaskedOr(vec1, vec2.AsMask32x8()) + case "MaskedRotateLeft": + gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask32x8()) + case "MaskedRotateRight": + gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask32x8()) + case "MaskedShiftLeft": + gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask32x8()) + case "MaskedShiftRight": + gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask32x8()) + case "MaskedShiftRightSignExtended": + gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask32x8()) case "MaskedSub": gotv = vec0.MaskedSub(vec1, vec2.AsMask32x8()) case "MaskedXor": @@ -3336,6 +3608,55 @@ func testInt32x8MaskedCompare(t *testing.T, v0 []int32, v1 []int32, v2 []int32, } } +func testInt32x8Ternary(t *testing.T, v0 []int32, v1 []int32, v2 []int32, want []int32, which string) { + t.Helper() + var gotv simd.Int32x8 + got := make([]int32, len(want)) + vec0 := simd.LoadInt32x8Slice(v0) + vec1 := simd.LoadInt32x8Slice(v1) + vec2 := simd.LoadInt32x8Slice(v2) + switch which { + case "ShiftLeftAndFillUpperFrom": + gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2) + case "ShiftRightAndFillUpperFrom": + gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2) + + default: + t.Errorf("Unknown method: Int32x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt32x8TernaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32, v3 []int32, want []int32, which string) { + t.Helper() + var gotv simd.Int32x8 + got := make([]int32, len(want)) + vec0 := simd.LoadInt32x8Slice(v0) + vec1 := simd.LoadInt32x8Slice(v1) + vec2 := simd.LoadInt32x8Slice(v2) + vec3 := simd.LoadInt32x8Slice(v3) + switch which { + case "MaskedShiftLeftAndFillUpperFrom": + gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask32x8()) + case "MaskedShiftRightAndFillUpperFrom": + gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask32x8()) + + default: + t.Errorf("Unknown method: Int32x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testInt32x8Uint8x32Int8x32Int32x8(t *testing.T, v0 []int32, v1 []uint8, v2 []int8, want []int32, which string) { t.Helper() var gotv simd.Int32x8 @@ -3451,6 +3772,16 @@ func testInt32x16Binary(t *testing.T, v0 []int32, v1 []int32, want []int32, whic gotv = vec0.MulLow(vec1) case "Or": gotv = vec0.Or(vec1) + case "RotateLeft": + gotv = vec0.RotateLeft(vec1) + case "RotateRight": + gotv = vec0.RotateRight(vec1) + case "ShiftLeft": + gotv = vec0.ShiftLeft(vec1) + case "ShiftRight": + gotv = vec0.ShiftRight(vec1) + case "ShiftRightSignExtended": + gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) case "Xor": @@ -3489,6 +3820,16 @@ func testInt32x16BinaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32, gotv = vec0.MaskedMulLow(vec1, vec2.AsMask32x16()) case "MaskedOr": gotv = vec0.MaskedOr(vec1, vec2.AsMask32x16()) + case "MaskedRotateLeft": + gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask32x16()) + case "MaskedRotateRight": + gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask32x16()) + case "MaskedShiftLeft": + gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask32x16()) + case "MaskedShiftRight": + gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask32x16()) + case "MaskedShiftRightSignExtended": + gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask32x16()) case "MaskedSub": gotv = vec0.MaskedSub(vec1, vec2.AsMask32x16()) case "MaskedXor": @@ -3617,6 +3958,55 @@ func testInt32x16MaskedCompare(t *testing.T, v0 []int32, v1 []int32, v2 []int32, } } +func testInt32x16Ternary(t *testing.T, v0 []int32, v1 []int32, v2 []int32, want []int32, which string) { + t.Helper() + var gotv simd.Int32x16 + got := make([]int32, len(want)) + vec0 := simd.LoadInt32x16Slice(v0) + vec1 := simd.LoadInt32x16Slice(v1) + vec2 := simd.LoadInt32x16Slice(v2) + switch which { + case "ShiftLeftAndFillUpperFrom": + gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2) + case "ShiftRightAndFillUpperFrom": + gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2) + + default: + t.Errorf("Unknown method: Int32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt32x16TernaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32, v3 []int32, want []int32, which string) { + t.Helper() + var gotv simd.Int32x16 + got := make([]int32, len(want)) + vec0 := simd.LoadInt32x16Slice(v0) + vec1 := simd.LoadInt32x16Slice(v1) + vec2 := simd.LoadInt32x16Slice(v2) + vec3 := simd.LoadInt32x16Slice(v3) + switch which { + case "MaskedShiftLeftAndFillUpperFrom": + gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask32x16()) + case "MaskedShiftRightAndFillUpperFrom": + gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask32x16()) + + default: + t.Errorf("Unknown method: Int32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testInt32x16Uint8x64Int8x64Int32x16(t *testing.T, v0 []int32, v1 []uint8, v2 []int8, want []int32, which string) { t.Helper() var gotv simd.Int32x16 @@ -3734,6 +4124,16 @@ func testInt64x2Binary(t *testing.T, v0 []int64, v1 []int64, want []int64, which gotv = vec0.MulLow(vec1) case "Or": gotv = vec0.Or(vec1) + case "RotateLeft": + gotv = vec0.RotateLeft(vec1) + case "RotateRight": + gotv = vec0.RotateRight(vec1) + case "ShiftLeft": + gotv = vec0.ShiftLeft(vec1) + case "ShiftRight": + gotv = vec0.ShiftRight(vec1) + case "ShiftRightSignExtended": + gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) case "Xor": @@ -3774,6 +4174,16 @@ func testInt64x2BinaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, w gotv = vec0.MaskedMulLow(vec1, vec2.AsMask64x2()) case "MaskedOr": gotv = vec0.MaskedOr(vec1, vec2.AsMask64x2()) + case "MaskedRotateLeft": + gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask64x2()) + case "MaskedRotateRight": + gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask64x2()) + case "MaskedShiftLeft": + gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask64x2()) + case "MaskedShiftRight": + gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask64x2()) + case "MaskedShiftRightSignExtended": + gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask64x2()) case "MaskedSub": gotv = vec0.MaskedSub(vec1, vec2.AsMask64x2()) case "MaskedXor": @@ -3853,6 +4263,55 @@ func testInt64x2MaskedCompare(t *testing.T, v0 []int64, v1 []int64, v2 []int64, } } +func testInt64x2Ternary(t *testing.T, v0 []int64, v1 []int64, v2 []int64, want []int64, which string) { + t.Helper() + var gotv simd.Int64x2 + got := make([]int64, len(want)) + vec0 := simd.LoadInt64x2Slice(v0) + vec1 := simd.LoadInt64x2Slice(v1) + vec2 := simd.LoadInt64x2Slice(v2) + switch which { + case "ShiftLeftAndFillUpperFrom": + gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2) + case "ShiftRightAndFillUpperFrom": + gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2) + + default: + t.Errorf("Unknown method: Int64x2.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt64x2TernaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, v3 []int64, want []int64, which string) { + t.Helper() + var gotv simd.Int64x2 + got := make([]int64, len(want)) + vec0 := simd.LoadInt64x2Slice(v0) + vec1 := simd.LoadInt64x2Slice(v1) + vec2 := simd.LoadInt64x2Slice(v2) + vec3 := simd.LoadInt64x2Slice(v3) + switch which { + case "MaskedShiftLeftAndFillUpperFrom": + gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask64x2()) + case "MaskedShiftRightAndFillUpperFrom": + gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask64x2()) + + default: + t.Errorf("Unknown method: Int64x2.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testInt64x2Unary(t *testing.T, v0 []int64, want []int64, which string) { t.Helper() var gotv simd.Int64x2 @@ -3921,6 +4380,16 @@ func testInt64x4Binary(t *testing.T, v0 []int64, v1 []int64, want []int64, which gotv = vec0.MulLow(vec1) case "Or": gotv = vec0.Or(vec1) + case "RotateLeft": + gotv = vec0.RotateLeft(vec1) + case "RotateRight": + gotv = vec0.RotateRight(vec1) + case "ShiftLeft": + gotv = vec0.ShiftLeft(vec1) + case "ShiftRight": + gotv = vec0.ShiftRight(vec1) + case "ShiftRightSignExtended": + gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) case "Xor": @@ -3961,6 +4430,16 @@ func testInt64x4BinaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, w gotv = vec0.MaskedMulLow(vec1, vec2.AsMask64x4()) case "MaskedOr": gotv = vec0.MaskedOr(vec1, vec2.AsMask64x4()) + case "MaskedRotateLeft": + gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask64x4()) + case "MaskedRotateRight": + gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask64x4()) + case "MaskedShiftLeft": + gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask64x4()) + case "MaskedShiftRight": + gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask64x4()) + case "MaskedShiftRightSignExtended": + gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask64x4()) case "MaskedSub": gotv = vec0.MaskedSub(vec1, vec2.AsMask64x4()) case "MaskedXor": @@ -4040,6 +4519,55 @@ func testInt64x4MaskedCompare(t *testing.T, v0 []int64, v1 []int64, v2 []int64, } } +func testInt64x4Ternary(t *testing.T, v0 []int64, v1 []int64, v2 []int64, want []int64, which string) { + t.Helper() + var gotv simd.Int64x4 + got := make([]int64, len(want)) + vec0 := simd.LoadInt64x4Slice(v0) + vec1 := simd.LoadInt64x4Slice(v1) + vec2 := simd.LoadInt64x4Slice(v2) + switch which { + case "ShiftLeftAndFillUpperFrom": + gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2) + case "ShiftRightAndFillUpperFrom": + gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2) + + default: + t.Errorf("Unknown method: Int64x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt64x4TernaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, v3 []int64, want []int64, which string) { + t.Helper() + var gotv simd.Int64x4 + got := make([]int64, len(want)) + vec0 := simd.LoadInt64x4Slice(v0) + vec1 := simd.LoadInt64x4Slice(v1) + vec2 := simd.LoadInt64x4Slice(v2) + vec3 := simd.LoadInt64x4Slice(v3) + switch which { + case "MaskedShiftLeftAndFillUpperFrom": + gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask64x4()) + case "MaskedShiftRightAndFillUpperFrom": + gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask64x4()) + + default: + t.Errorf("Unknown method: Int64x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testInt64x4Unary(t *testing.T, v0 []int64, want []int64, which string) { t.Helper() var gotv simd.Int64x4 @@ -4108,6 +4636,16 @@ func testInt64x8Binary(t *testing.T, v0 []int64, v1 []int64, want []int64, which gotv = vec0.MulLow(vec1) case "Or": gotv = vec0.Or(vec1) + case "RotateLeft": + gotv = vec0.RotateLeft(vec1) + case "RotateRight": + gotv = vec0.RotateRight(vec1) + case "ShiftLeft": + gotv = vec0.ShiftLeft(vec1) + case "ShiftRight": + gotv = vec0.ShiftRight(vec1) + case "ShiftRightSignExtended": + gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) case "Xor": @@ -4148,6 +4686,16 @@ func testInt64x8BinaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, w gotv = vec0.MaskedMulLow(vec1, vec2.AsMask64x8()) case "MaskedOr": gotv = vec0.MaskedOr(vec1, vec2.AsMask64x8()) + case "MaskedRotateLeft": + gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask64x8()) + case "MaskedRotateRight": + gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask64x8()) + case "MaskedShiftLeft": + gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask64x8()) + case "MaskedShiftRight": + gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask64x8()) + case "MaskedShiftRightSignExtended": + gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask64x8()) case "MaskedSub": gotv = vec0.MaskedSub(vec1, vec2.AsMask64x8()) case "MaskedXor": @@ -4227,6 +4775,55 @@ func testInt64x8MaskedCompare(t *testing.T, v0 []int64, v1 []int64, v2 []int64, } } +func testInt64x8Ternary(t *testing.T, v0 []int64, v1 []int64, v2 []int64, want []int64, which string) { + t.Helper() + var gotv simd.Int64x8 + got := make([]int64, len(want)) + vec0 := simd.LoadInt64x8Slice(v0) + vec1 := simd.LoadInt64x8Slice(v1) + vec2 := simd.LoadInt64x8Slice(v2) + switch which { + case "ShiftLeftAndFillUpperFrom": + gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2) + case "ShiftRightAndFillUpperFrom": + gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2) + + default: + t.Errorf("Unknown method: Int64x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testInt64x8TernaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, v3 []int64, want []int64, which string) { + t.Helper() + var gotv simd.Int64x8 + got := make([]int64, len(want)) + vec0 := simd.LoadInt64x8Slice(v0) + vec1 := simd.LoadInt64x8Slice(v1) + vec2 := simd.LoadInt64x8Slice(v2) + vec3 := simd.LoadInt64x8Slice(v3) + switch which { + case "MaskedShiftLeftAndFillUpperFrom": + gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask64x8()) + case "MaskedShiftRightAndFillUpperFrom": + gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask64x8()) + + default: + t.Errorf("Unknown method: Int64x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testInt64x8Unary(t *testing.T, v0 []int64, want []int64, which string) { t.Helper() var gotv simd.Int64x8 @@ -4961,6 +5558,12 @@ func testUint16x8Binary(t *testing.T, v0 []uint16, v1 []uint16, want []uint16, w gotv = vec0.SaturatedAdd(vec1) case "SaturatedSub": gotv = vec0.SaturatedSub(vec1) + case "ShiftLeft": + gotv = vec0.ShiftLeft(vec1) + case "ShiftRight": + gotv = vec0.ShiftRight(vec1) + case "ShiftRightSignExtended": + gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) case "Xor": @@ -4999,6 +5602,12 @@ func testUint16x8BinaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []int16 gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask16x8()) case "MaskedSaturatedSub": gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask16x8()) + case "MaskedShiftLeft": + gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask16x8()) + case "MaskedShiftRight": + gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask16x8()) + case "MaskedShiftRightSignExtended": + gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask16x8()) case "MaskedSub": gotv = vec0.MaskedSub(vec1, vec2.AsMask16x8()) @@ -5076,6 +5685,55 @@ func testUint16x8MaskedCompare(t *testing.T, v0 []uint16, v1 []uint16, v2 []int1 } } +func testUint16x8Ternary(t *testing.T, v0 []uint16, v1 []uint16, v2 []uint16, want []uint16, which string) { + t.Helper() + var gotv simd.Uint16x8 + got := make([]uint16, len(want)) + vec0 := simd.LoadUint16x8Slice(v0) + vec1 := simd.LoadUint16x8Slice(v1) + vec2 := simd.LoadUint16x8Slice(v2) + switch which { + case "ShiftLeftAndFillUpperFrom": + gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2) + case "ShiftRightAndFillUpperFrom": + gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2) + + default: + t.Errorf("Unknown method: Uint16x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint16x8TernaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []uint16, v3 []int16, want []uint16, which string) { + t.Helper() + var gotv simd.Uint16x8 + got := make([]uint16, len(want)) + vec0 := simd.LoadUint16x8Slice(v0) + vec1 := simd.LoadUint16x8Slice(v1) + vec2 := simd.LoadUint16x8Slice(v2) + vec3 := simd.LoadInt16x8Slice(v3) + switch which { + case "MaskedShiftLeftAndFillUpperFrom": + gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask16x8()) + case "MaskedShiftRightAndFillUpperFrom": + gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask16x8()) + + default: + t.Errorf("Unknown method: Uint16x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testUint16x8Unary(t *testing.T, v0 []uint16, want []uint16, which string) { t.Helper() var gotv simd.Uint16x8 @@ -5148,6 +5806,12 @@ func testUint16x16Binary(t *testing.T, v0 []uint16, v1 []uint16, want []uint16, gotv = vec0.SaturatedAdd(vec1) case "SaturatedSub": gotv = vec0.SaturatedSub(vec1) + case "ShiftLeft": + gotv = vec0.ShiftLeft(vec1) + case "ShiftRight": + gotv = vec0.ShiftRight(vec1) + case "ShiftRightSignExtended": + gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) case "Xor": @@ -5186,6 +5850,12 @@ func testUint16x16BinaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []int1 gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask16x16()) case "MaskedSaturatedSub": gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask16x16()) + case "MaskedShiftLeft": + gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask16x16()) + case "MaskedShiftRight": + gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask16x16()) + case "MaskedShiftRightSignExtended": + gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask16x16()) case "MaskedSub": gotv = vec0.MaskedSub(vec1, vec2.AsMask16x16()) @@ -5263,6 +5933,55 @@ func testUint16x16MaskedCompare(t *testing.T, v0 []uint16, v1 []uint16, v2 []int } } +func testUint16x16Ternary(t *testing.T, v0 []uint16, v1 []uint16, v2 []uint16, want []uint16, which string) { + t.Helper() + var gotv simd.Uint16x16 + got := make([]uint16, len(want)) + vec0 := simd.LoadUint16x16Slice(v0) + vec1 := simd.LoadUint16x16Slice(v1) + vec2 := simd.LoadUint16x16Slice(v2) + switch which { + case "ShiftLeftAndFillUpperFrom": + gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2) + case "ShiftRightAndFillUpperFrom": + gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2) + + default: + t.Errorf("Unknown method: Uint16x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint16x16TernaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []uint16, v3 []int16, want []uint16, which string) { + t.Helper() + var gotv simd.Uint16x16 + got := make([]uint16, len(want)) + vec0 := simd.LoadUint16x16Slice(v0) + vec1 := simd.LoadUint16x16Slice(v1) + vec2 := simd.LoadUint16x16Slice(v2) + vec3 := simd.LoadInt16x16Slice(v3) + switch which { + case "MaskedShiftLeftAndFillUpperFrom": + gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask16x16()) + case "MaskedShiftRightAndFillUpperFrom": + gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask16x16()) + + default: + t.Errorf("Unknown method: Uint16x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testUint16x16Unary(t *testing.T, v0 []uint16, want []uint16, which string) { t.Helper() var gotv simd.Uint16x16 @@ -5325,6 +6044,12 @@ func testUint16x32Binary(t *testing.T, v0 []uint16, v1 []uint16, want []uint16, gotv = vec0.SaturatedAdd(vec1) case "SaturatedSub": gotv = vec0.SaturatedSub(vec1) + case "ShiftLeft": + gotv = vec0.ShiftLeft(vec1) + case "ShiftRight": + gotv = vec0.ShiftRight(vec1) + case "ShiftRightSignExtended": + gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) @@ -5361,6 +6086,12 @@ func testUint16x32BinaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []int1 gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask16x32()) case "MaskedSaturatedSub": gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask16x32()) + case "MaskedShiftLeft": + gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask16x32()) + case "MaskedShiftRight": + gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask16x32()) + case "MaskedShiftRightSignExtended": + gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask16x32()) case "MaskedSub": gotv = vec0.MaskedSub(vec1, vec2.AsMask16x32()) @@ -5438,6 +6169,55 @@ func testUint16x32MaskedCompare(t *testing.T, v0 []uint16, v1 []uint16, v2 []int } } +func testUint16x32Ternary(t *testing.T, v0 []uint16, v1 []uint16, v2 []uint16, want []uint16, which string) { + t.Helper() + var gotv simd.Uint16x32 + got := make([]uint16, len(want)) + vec0 := simd.LoadUint16x32Slice(v0) + vec1 := simd.LoadUint16x32Slice(v1) + vec2 := simd.LoadUint16x32Slice(v2) + switch which { + case "ShiftLeftAndFillUpperFrom": + gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2) + case "ShiftRightAndFillUpperFrom": + gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2) + + default: + t.Errorf("Unknown method: Uint16x32.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint16x32TernaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []uint16, v3 []int16, want []uint16, which string) { + t.Helper() + var gotv simd.Uint16x32 + got := make([]uint16, len(want)) + vec0 := simd.LoadUint16x32Slice(v0) + vec1 := simd.LoadUint16x32Slice(v1) + vec2 := simd.LoadUint16x32Slice(v2) + vec3 := simd.LoadInt16x32Slice(v3) + switch which { + case "MaskedShiftLeftAndFillUpperFrom": + gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask16x32()) + case "MaskedShiftRightAndFillUpperFrom": + gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask16x32()) + + default: + t.Errorf("Unknown method: Uint16x32.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testUint16x32Unary(t *testing.T, v0 []uint16, want []uint16, which string) { t.Helper() var gotv simd.Uint16x32 @@ -5502,6 +6282,16 @@ func testUint32x4Binary(t *testing.T, v0 []uint32, v1 []uint32, want []uint32, w gotv = vec0.PairwiseAdd(vec1) case "PairwiseSub": gotv = vec0.PairwiseSub(vec1) + case "RotateLeft": + gotv = vec0.RotateLeft(vec1) + case "RotateRight": + gotv = vec0.RotateRight(vec1) + case "ShiftLeft": + gotv = vec0.ShiftLeft(vec1) + case "ShiftRight": + gotv = vec0.ShiftRight(vec1) + case "ShiftRightSignExtended": + gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) case "Xor": @@ -5538,6 +6328,16 @@ func testUint32x4BinaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32 gotv = vec0.MaskedMin(vec1, vec2.AsMask32x4()) case "MaskedOr": gotv = vec0.MaskedOr(vec1, vec2.AsMask32x4()) + case "MaskedRotateLeft": + gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask32x4()) + case "MaskedRotateRight": + gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask32x4()) + case "MaskedShiftLeft": + gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask32x4()) + case "MaskedShiftRight": + gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask32x4()) + case "MaskedShiftRightSignExtended": + gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask32x4()) case "MaskedSub": gotv = vec0.MaskedSub(vec1, vec2.AsMask32x4()) case "MaskedXor": @@ -5638,6 +6438,55 @@ func testUint32x4MaskedCompare(t *testing.T, v0 []uint32, v1 []uint32, v2 []int3 } } +func testUint32x4Ternary(t *testing.T, v0 []uint32, v1 []uint32, v2 []uint32, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x4 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x4Slice(v0) + vec1 := simd.LoadUint32x4Slice(v1) + vec2 := simd.LoadUint32x4Slice(v2) + switch which { + case "ShiftLeftAndFillUpperFrom": + gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2) + case "ShiftRightAndFillUpperFrom": + gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2) + + default: + t.Errorf("Unknown method: Uint32x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x4TernaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []uint32, v3 []int32, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x4 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x4Slice(v0) + vec1 := simd.LoadUint32x4Slice(v1) + vec2 := simd.LoadUint32x4Slice(v2) + vec3 := simd.LoadInt32x4Slice(v3) + switch which { + case "MaskedShiftLeftAndFillUpperFrom": + gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask32x4()) + case "MaskedShiftRightAndFillUpperFrom": + gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask32x4()) + + default: + t.Errorf("Unknown method: Uint32x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testUint32x4Uint8x16Int8x16Mask32x4Uint32x4(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, v3 []int32, want []uint32, which string) { t.Helper() var gotv simd.Uint32x4 @@ -5751,6 +6600,16 @@ func testUint32x8Binary(t *testing.T, v0 []uint32, v1 []uint32, want []uint32, w gotv = vec0.PairwiseAdd(vec1) case "PairwiseSub": gotv = vec0.PairwiseSub(vec1) + case "RotateLeft": + gotv = vec0.RotateLeft(vec1) + case "RotateRight": + gotv = vec0.RotateRight(vec1) + case "ShiftLeft": + gotv = vec0.ShiftLeft(vec1) + case "ShiftRight": + gotv = vec0.ShiftRight(vec1) + case "ShiftRightSignExtended": + gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) case "Xor": @@ -5787,6 +6646,16 @@ func testUint32x8BinaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32 gotv = vec0.MaskedMin(vec1, vec2.AsMask32x8()) case "MaskedOr": gotv = vec0.MaskedOr(vec1, vec2.AsMask32x8()) + case "MaskedRotateLeft": + gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask32x8()) + case "MaskedRotateRight": + gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask32x8()) + case "MaskedShiftLeft": + gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask32x8()) + case "MaskedShiftRight": + gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask32x8()) + case "MaskedShiftRightSignExtended": + gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask32x8()) case "MaskedSub": gotv = vec0.MaskedSub(vec1, vec2.AsMask32x8()) case "MaskedXor": @@ -5887,6 +6756,55 @@ func testUint32x8MaskedCompare(t *testing.T, v0 []uint32, v1 []uint32, v2 []int3 } } +func testUint32x8Ternary(t *testing.T, v0 []uint32, v1 []uint32, v2 []uint32, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x8 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x8Slice(v0) + vec1 := simd.LoadUint32x8Slice(v1) + vec2 := simd.LoadUint32x8Slice(v2) + switch which { + case "ShiftLeftAndFillUpperFrom": + gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2) + case "ShiftRightAndFillUpperFrom": + gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2) + + default: + t.Errorf("Unknown method: Uint32x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x8TernaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []uint32, v3 []int32, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x8 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x8Slice(v0) + vec1 := simd.LoadUint32x8Slice(v1) + vec2 := simd.LoadUint32x8Slice(v2) + vec3 := simd.LoadInt32x8Slice(v3) + switch which { + case "MaskedShiftLeftAndFillUpperFrom": + gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask32x8()) + case "MaskedShiftRightAndFillUpperFrom": + gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask32x8()) + + default: + t.Errorf("Unknown method: Uint32x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testUint32x8Uint8x32Int8x32Mask32x8Uint32x8(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, v3 []int32, want []uint32, which string) { t.Helper() var gotv simd.Uint32x8 @@ -5996,6 +6914,16 @@ func testUint32x16Binary(t *testing.T, v0 []uint32, v1 []uint32, want []uint32, gotv = vec0.Min(vec1) case "Or": gotv = vec0.Or(vec1) + case "RotateLeft": + gotv = vec0.RotateLeft(vec1) + case "RotateRight": + gotv = vec0.RotateRight(vec1) + case "ShiftLeft": + gotv = vec0.ShiftLeft(vec1) + case "ShiftRight": + gotv = vec0.ShiftRight(vec1) + case "ShiftRightSignExtended": + gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) case "Xor": @@ -6032,6 +6960,16 @@ func testUint32x16BinaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []int3 gotv = vec0.MaskedMin(vec1, vec2.AsMask32x16()) case "MaskedOr": gotv = vec0.MaskedOr(vec1, vec2.AsMask32x16()) + case "MaskedRotateLeft": + gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask32x16()) + case "MaskedRotateRight": + gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask32x16()) + case "MaskedShiftLeft": + gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask32x16()) + case "MaskedShiftRight": + gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask32x16()) + case "MaskedShiftRightSignExtended": + gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask32x16()) case "MaskedSub": gotv = vec0.MaskedSub(vec1, vec2.AsMask32x16()) case "MaskedXor": @@ -6111,6 +7049,55 @@ func testUint32x16MaskedCompare(t *testing.T, v0 []uint32, v1 []uint32, v2 []int } } +func testUint32x16Ternary(t *testing.T, v0 []uint32, v1 []uint32, v2 []uint32, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x16 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x16Slice(v0) + vec1 := simd.LoadUint32x16Slice(v1) + vec2 := simd.LoadUint32x16Slice(v2) + switch which { + case "ShiftLeftAndFillUpperFrom": + gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2) + case "ShiftRightAndFillUpperFrom": + gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2) + + default: + t.Errorf("Unknown method: Uint32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint32x16TernaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []uint32, v3 []int32, want []uint32, which string) { + t.Helper() + var gotv simd.Uint32x16 + got := make([]uint32, len(want)) + vec0 := simd.LoadUint32x16Slice(v0) + vec1 := simd.LoadUint32x16Slice(v1) + vec2 := simd.LoadUint32x16Slice(v2) + vec3 := simd.LoadInt32x16Slice(v3) + switch which { + case "MaskedShiftLeftAndFillUpperFrom": + gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask32x16()) + case "MaskedShiftRightAndFillUpperFrom": + gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask32x16()) + + default: + t.Errorf("Unknown method: Uint32x16.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testUint32x16Uint8x64Int8x64Mask32x16Uint32x16(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, v3 []int32, want []uint32, which string) { t.Helper() var gotv simd.Uint32x16 @@ -6222,6 +7209,16 @@ func testUint64x2Binary(t *testing.T, v0 []uint64, v1 []uint64, want []uint64, w gotv = vec0.MulEvenWiden(vec1) case "Or": gotv = vec0.Or(vec1) + case "RotateLeft": + gotv = vec0.RotateLeft(vec1) + case "RotateRight": + gotv = vec0.RotateRight(vec1) + case "ShiftLeft": + gotv = vec0.ShiftLeft(vec1) + case "ShiftRight": + gotv = vec0.ShiftRight(vec1) + case "ShiftRightSignExtended": + gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) case "Xor": @@ -6260,6 +7257,16 @@ func testUint64x2BinaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64 gotv = vec0.MaskedMulEvenWiden(vec1, vec2.AsMask64x2()) case "MaskedOr": gotv = vec0.MaskedOr(vec1, vec2.AsMask64x2()) + case "MaskedRotateLeft": + gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask64x2()) + case "MaskedRotateRight": + gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask64x2()) + case "MaskedShiftLeft": + gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask64x2()) + case "MaskedShiftRight": + gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask64x2()) + case "MaskedShiftRightSignExtended": + gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask64x2()) case "MaskedSub": gotv = vec0.MaskedSub(vec1, vec2.AsMask64x2()) case "MaskedXor": @@ -6339,6 +7346,55 @@ func testUint64x2MaskedCompare(t *testing.T, v0 []uint64, v1 []uint64, v2 []int6 } } +func testUint64x2Ternary(t *testing.T, v0 []uint64, v1 []uint64, v2 []uint64, want []uint64, which string) { + t.Helper() + var gotv simd.Uint64x2 + got := make([]uint64, len(want)) + vec0 := simd.LoadUint64x2Slice(v0) + vec1 := simd.LoadUint64x2Slice(v1) + vec2 := simd.LoadUint64x2Slice(v2) + switch which { + case "ShiftLeftAndFillUpperFrom": + gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2) + case "ShiftRightAndFillUpperFrom": + gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2) + + default: + t.Errorf("Unknown method: Uint64x2.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint64x2TernaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []uint64, v3 []int64, want []uint64, which string) { + t.Helper() + var gotv simd.Uint64x2 + got := make([]uint64, len(want)) + vec0 := simd.LoadUint64x2Slice(v0) + vec1 := simd.LoadUint64x2Slice(v1) + vec2 := simd.LoadUint64x2Slice(v2) + vec3 := simd.LoadInt64x2Slice(v3) + switch which { + case "MaskedShiftLeftAndFillUpperFrom": + gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask64x2()) + case "MaskedShiftRightAndFillUpperFrom": + gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask64x2()) + + default: + t.Errorf("Unknown method: Uint64x2.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testUint64x2Unary(t *testing.T, v0 []uint64, want []uint64, which string) { t.Helper() var gotv simd.Uint64x2 @@ -6401,6 +7457,16 @@ func testUint64x4Binary(t *testing.T, v0 []uint64, v1 []uint64, want []uint64, w gotv = vec0.MulEvenWiden(vec1) case "Or": gotv = vec0.Or(vec1) + case "RotateLeft": + gotv = vec0.RotateLeft(vec1) + case "RotateRight": + gotv = vec0.RotateRight(vec1) + case "ShiftLeft": + gotv = vec0.ShiftLeft(vec1) + case "ShiftRight": + gotv = vec0.ShiftRight(vec1) + case "ShiftRightSignExtended": + gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) case "Xor": @@ -6439,6 +7505,16 @@ func testUint64x4BinaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64 gotv = vec0.MaskedMulEvenWiden(vec1, vec2.AsMask64x4()) case "MaskedOr": gotv = vec0.MaskedOr(vec1, vec2.AsMask64x4()) + case "MaskedRotateLeft": + gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask64x4()) + case "MaskedRotateRight": + gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask64x4()) + case "MaskedShiftLeft": + gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask64x4()) + case "MaskedShiftRight": + gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask64x4()) + case "MaskedShiftRightSignExtended": + gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask64x4()) case "MaskedSub": gotv = vec0.MaskedSub(vec1, vec2.AsMask64x4()) case "MaskedXor": @@ -6518,6 +7594,55 @@ func testUint64x4MaskedCompare(t *testing.T, v0 []uint64, v1 []uint64, v2 []int6 } } +func testUint64x4Ternary(t *testing.T, v0 []uint64, v1 []uint64, v2 []uint64, want []uint64, which string) { + t.Helper() + var gotv simd.Uint64x4 + got := make([]uint64, len(want)) + vec0 := simd.LoadUint64x4Slice(v0) + vec1 := simd.LoadUint64x4Slice(v1) + vec2 := simd.LoadUint64x4Slice(v2) + switch which { + case "ShiftLeftAndFillUpperFrom": + gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2) + case "ShiftRightAndFillUpperFrom": + gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2) + + default: + t.Errorf("Unknown method: Uint64x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint64x4TernaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []uint64, v3 []int64, want []uint64, which string) { + t.Helper() + var gotv simd.Uint64x4 + got := make([]uint64, len(want)) + vec0 := simd.LoadUint64x4Slice(v0) + vec1 := simd.LoadUint64x4Slice(v1) + vec2 := simd.LoadUint64x4Slice(v2) + vec3 := simd.LoadInt64x4Slice(v3) + switch which { + case "MaskedShiftLeftAndFillUpperFrom": + gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask64x4()) + case "MaskedShiftRightAndFillUpperFrom": + gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask64x4()) + + default: + t.Errorf("Unknown method: Uint64x4.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testUint64x4Unary(t *testing.T, v0 []uint64, want []uint64, which string) { t.Helper() var gotv simd.Uint64x4 @@ -6580,6 +7705,16 @@ func testUint64x8Binary(t *testing.T, v0 []uint64, v1 []uint64, want []uint64, w gotv = vec0.MulEvenWiden(vec1) case "Or": gotv = vec0.Or(vec1) + case "RotateLeft": + gotv = vec0.RotateLeft(vec1) + case "RotateRight": + gotv = vec0.RotateRight(vec1) + case "ShiftLeft": + gotv = vec0.ShiftLeft(vec1) + case "ShiftRight": + gotv = vec0.ShiftRight(vec1) + case "ShiftRightSignExtended": + gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) case "Xor": @@ -6618,6 +7753,16 @@ func testUint64x8BinaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64 gotv = vec0.MaskedMulEvenWiden(vec1, vec2.AsMask64x8()) case "MaskedOr": gotv = vec0.MaskedOr(vec1, vec2.AsMask64x8()) + case "MaskedRotateLeft": + gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask64x8()) + case "MaskedRotateRight": + gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask64x8()) + case "MaskedShiftLeft": + gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask64x8()) + case "MaskedShiftRight": + gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask64x8()) + case "MaskedShiftRightSignExtended": + gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask64x8()) case "MaskedSub": gotv = vec0.MaskedSub(vec1, vec2.AsMask64x8()) case "MaskedXor": @@ -6697,6 +7842,55 @@ func testUint64x8MaskedCompare(t *testing.T, v0 []uint64, v1 []uint64, v2 []int6 } } +func testUint64x8Ternary(t *testing.T, v0 []uint64, v1 []uint64, v2 []uint64, want []uint64, which string) { + t.Helper() + var gotv simd.Uint64x8 + got := make([]uint64, len(want)) + vec0 := simd.LoadUint64x8Slice(v0) + vec1 := simd.LoadUint64x8Slice(v1) + vec2 := simd.LoadUint64x8Slice(v2) + switch which { + case "ShiftLeftAndFillUpperFrom": + gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2) + case "ShiftRightAndFillUpperFrom": + gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2) + + default: + t.Errorf("Unknown method: Uint64x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func testUint64x8TernaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []uint64, v3 []int64, want []uint64, which string) { + t.Helper() + var gotv simd.Uint64x8 + got := make([]uint64, len(want)) + vec0 := simd.LoadUint64x8Slice(v0) + vec1 := simd.LoadUint64x8Slice(v1) + vec2 := simd.LoadUint64x8Slice(v2) + vec3 := simd.LoadInt64x8Slice(v3) + switch which { + case "MaskedShiftLeftAndFillUpperFrom": + gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask64x8()) + case "MaskedShiftRightAndFillUpperFrom": + gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask64x8()) + + default: + t.Errorf("Unknown method: Uint64x8.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + func testUint64x8Unary(t *testing.T, v0 []uint64, want []uint64, which string) { t.Helper() var gotv simd.Uint64x8 @@ -6737,3 +7931,54 @@ func testUint64x8UnaryMasked(t *testing.T, v0 []uint64, v1 []int64, want []uint6 } } } + +/* The operations below cannot be tested via wrappers, please test them directly */ + +// CeilSuppressExceptionWithPrecision +// CeilWithPrecision +// DiffWithCeilSuppressExceptionWithPrecision +// DiffWithCeilWithPrecision +// DiffWithFloorSuppressExceptionWithPrecision +// DiffWithFloorWithPrecision +// DiffWithRoundSuppressExceptionWithPrecision +// DiffWithRoundWithPrecision +// DiffWithTruncSuppressExceptionWithPrecision +// DiffWithTruncWithPrecision +// FloorSuppressExceptionWithPrecision +// FloorWithPrecision +// GetElem +// MaskedCeilSuppressExceptionWithPrecision +// MaskedCeilWithPrecision +// MaskedDiffWithCeilSuppressExceptionWithPrecision +// MaskedDiffWithCeilWithPrecision +// MaskedDiffWithFloorSuppressExceptionWithPrecision +// MaskedDiffWithFloorWithPrecision +// MaskedDiffWithRoundSuppressExceptionWithPrecision +// MaskedDiffWithRoundWithPrecision +// MaskedDiffWithTruncSuppressExceptionWithPrecision +// MaskedDiffWithTruncWithPrecision +// MaskedFloorSuppressExceptionWithPrecision +// MaskedFloorWithPrecision +// MaskedRotateAllLeft +// MaskedRotateAllRight +// MaskedRoundSuppressExceptionWithPrecision +// MaskedRoundWithPrecision +// MaskedShiftAllLeft +// MaskedShiftAllLeftAndFillUpperFrom +// MaskedShiftAllRight +// MaskedShiftAllRightAndFillUpperFrom +// MaskedShiftAllRightSignExtended +// MaskedTruncSuppressExceptionWithPrecision +// MaskedTruncWithPrecision +// RotateAllLeft +// RotateAllRight +// RoundSuppressExceptionWithPrecision +// RoundWithPrecision +// SetElem +// ShiftAllLeft +// ShiftAllLeftAndFillUpperFrom +// ShiftAllRight +// ShiftAllRightAndFillUpperFrom +// ShiftAllRightSignExtended +// TruncSuppressExceptionWithPrecision +// TruncWithPrecision diff --git a/src/simd/stubs_amd64.go b/src/simd/stubs_amd64.go index 5037e4e024e..330ad6aca2a 100644 --- a/src/simd/stubs_amd64.go +++ b/src/simd/stubs_amd64.go @@ -5178,6 +5178,254 @@ func (x Uint64x4) MaskedPopCount(y Mask64x4) Uint64x4 // Asm: VPOPCNTQ, CPU Feature: AVX512EVEX func (x Uint64x8) MaskedPopCount(y Mask64x8) Uint64x8 +/* MaskedRotateAllLeft */ + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedRotateAllLeft(imm uint8, y Mask32x4) Int32x4 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedRotateAllLeft(imm uint8, y Mask32x8) Int32x8 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedRotateAllLeft(imm uint8, y Mask32x16) Int32x16 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedRotateAllLeft(imm uint8, y Mask64x2) Int64x2 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedRotateAllLeft(imm uint8, y Mask64x4) Int64x4 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedRotateAllLeft(imm uint8, y Mask64x8) Int64x8 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedRotateAllLeft(imm uint8, y Mask32x4) Uint32x4 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedRotateAllLeft(imm uint8, y Mask32x8) Uint32x8 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLD, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedRotateAllLeft(imm uint8, y Mask32x16) Uint32x16 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedRotateAllLeft(imm uint8, y Mask64x2) Uint64x2 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedRotateAllLeft(imm uint8, y Mask64x4) Uint64x4 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedRotateAllLeft(imm uint8, y Mask64x8) Uint64x8 + +/* MaskedRotateAllRight */ + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedRotateAllRight(imm uint8, y Mask32x4) Int32x4 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedRotateAllRight(imm uint8, y Mask32x8) Int32x8 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedRotateAllRight(imm uint8, y Mask32x16) Int32x16 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedRotateAllRight(imm uint8, y Mask64x2) Int64x2 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedRotateAllRight(imm uint8, y Mask64x4) Int64x4 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedRotateAllRight(imm uint8, y Mask64x8) Int64x8 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedRotateAllRight(imm uint8, y Mask32x4) Uint32x4 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedRotateAllRight(imm uint8, y Mask32x8) Uint32x8 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORD, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedRotateAllRight(imm uint8, y Mask32x16) Uint32x16 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedRotateAllRight(imm uint8, y Mask64x2) Uint64x2 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedRotateAllRight(imm uint8, y Mask64x4) Uint64x4 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedRotateAllRight(imm uint8, y Mask64x8) Uint64x8 + +/* MaskedRotateLeft */ + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedRotateLeft(y Int32x4, z Mask32x4) Int32x4 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedRotateLeft(y Int32x8, z Mask32x8) Int32x8 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedRotateLeft(y Int32x16, z Mask32x16) Int32x16 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedRotateLeft(y Int64x2, z Mask64x2) Int64x2 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedRotateLeft(y Int64x4, z Mask64x4) Int64x4 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedRotateLeft(y Int64x8, z Mask64x8) Int64x8 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedRotateLeft(y Uint32x4, z Mask32x4) Uint32x4 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedRotateLeft(y Uint32x8, z Mask32x8) Uint32x8 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVD, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedRotateLeft(y Uint32x16, z Mask32x16) Uint32x16 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedRotateLeft(y Uint64x2, z Mask64x2) Uint64x2 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedRotateLeft(y Uint64x4, z Mask64x4) Uint64x4 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedRotateLeft(y Uint64x8, z Mask64x8) Uint64x8 + +/* MaskedRotateRight */ + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedRotateRight(y Int32x4, z Mask32x4) Int32x4 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedRotateRight(y Int32x8, z Mask32x8) Int32x8 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedRotateRight(y Int32x16, z Mask32x16) Int32x16 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedRotateRight(y Int64x2, z Mask64x2) Int64x2 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedRotateRight(y Int64x4, z Mask64x4) Int64x4 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedRotateRight(y Int64x8, z Mask64x8) Int64x8 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedRotateRight(y Uint32x4, z Mask32x4) Uint32x4 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedRotateRight(y Uint32x8, z Mask32x8) Uint32x8 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVD, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedRotateRight(y Uint32x16, z Mask32x16) Uint32x16 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedRotateRight(y Uint64x2, z Mask64x2) Uint64x2 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedRotateRight(y Uint64x4, z Mask64x4) Uint64x4 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedRotateRight(y Uint64x8, z Mask64x8) Uint64x8 + /* MaskedRoundSuppressExceptionWithPrecision */ // RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions. @@ -5447,6 +5695,803 @@ func (x Uint32x8) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, // Asm: VPDPBUSDS, CPU Feature: AVX512EVEX func (x Uint32x16) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64, u Mask32x16) Uint32x16 +/* MaskedShiftAllLeft */ + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedShiftAllLeft(y uint64, z Mask64x2) Int64x2 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedShiftAllLeft(y uint64, z Mask64x4) Int64x4 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedShiftAllLeft(y uint64, z Mask64x8) Int64x8 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedShiftAllLeft(y uint64, z Mask64x2) Uint64x2 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedShiftAllLeft(y uint64, z Mask64x4) Uint64x4 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedShiftAllLeft(y uint64, z Mask64x8) Uint64x8 + +/* MaskedShiftAllLeftAndFillUpperFrom */ + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDW, CPU Feature: AVX512EVEX +func (x Int16x8) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int16x8, z Mask16x8) Int16x8 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDW, CPU Feature: AVX512EVEX +func (x Int16x16) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int16x16, z Mask16x16) Int16x16 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDW, CPU Feature: AVX512EVEX +func (x Int16x32) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int16x32, z Mask16x32) Int16x32 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int32x4, z Mask32x4) Int32x4 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int32x8, z Mask32x8) Int32x8 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int32x16, z Mask32x16) Int32x16 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int64x2, z Mask64x2) Int64x2 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int64x4, z Mask64x4) Int64x4 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int64x8, z Mask64x8) Int64x8 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDW, CPU Feature: AVX512EVEX +func (x Uint16x8) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint16x8, z Mask16x8) Uint16x8 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDW, CPU Feature: AVX512EVEX +func (x Uint16x16) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint16x16, z Mask16x16) Uint16x16 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDW, CPU Feature: AVX512EVEX +func (x Uint16x32) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint16x32, z Mask16x32) Uint16x32 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint32x4, z Mask32x4) Uint32x4 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint32x8, z Mask32x8) Uint32x8 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDD, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint32x16, z Mask32x16) Uint32x16 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint64x2, z Mask64x2) Uint64x2 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint64x4, z Mask64x4) Uint64x4 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint64x8, z Mask64x8) Uint64x8 + +/* MaskedShiftAllRight */ + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedShiftAllRight(y uint64, z Mask64x2) Int64x2 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedShiftAllRight(y uint64, z Mask64x4) Int64x4 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedShiftAllRight(y uint64, z Mask64x8) Int64x8 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedShiftAllRight(y uint64, z Mask64x2) Uint64x2 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedShiftAllRight(y uint64, z Mask64x4) Uint64x4 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedShiftAllRight(y uint64, z Mask64x8) Uint64x8 + +/* MaskedShiftAllRightAndFillUpperFrom */ + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDW, CPU Feature: AVX512EVEX +func (x Int16x8) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int16x8, z Mask16x8) Int16x8 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDW, CPU Feature: AVX512EVEX +func (x Int16x16) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int16x16, z Mask16x16) Int16x16 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDW, CPU Feature: AVX512EVEX +func (x Int16x32) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int16x32, z Mask16x32) Int16x32 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int32x4, z Mask32x4) Int32x4 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int32x8, z Mask32x8) Int32x8 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int32x16, z Mask32x16) Int32x16 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int64x2, z Mask64x2) Int64x2 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int64x4, z Mask64x4) Int64x4 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int64x8, z Mask64x8) Int64x8 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDW, CPU Feature: AVX512EVEX +func (x Uint16x8) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint16x8, z Mask16x8) Uint16x8 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDW, CPU Feature: AVX512EVEX +func (x Uint16x16) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint16x16, z Mask16x16) Uint16x16 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDW, CPU Feature: AVX512EVEX +func (x Uint16x32) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint16x32, z Mask16x32) Uint16x32 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint32x4, z Mask32x4) Uint32x4 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint32x8, z Mask32x8) Uint32x8 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDD, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint32x16, z Mask32x16) Uint32x16 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint64x2, z Mask64x2) Uint64x2 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint64x4, z Mask64x4) Uint64x4 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint64x8, z Mask64x8) Uint64x8 + +/* MaskedShiftAllRightSignExtended */ + +// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedShiftAllRightSignExtended(y uint64, z Mask64x2) Int64x2 + +// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedShiftAllRightSignExtended(y uint64, z Mask64x4) Int64x4 + +// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedShiftAllRightSignExtended(y uint64, z Mask64x8) Int64x8 + +/* MaskedShiftLeft */ + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVW, CPU Feature: AVX512EVEX +func (x Int16x8) MaskedShiftLeft(y Int16x8, z Mask16x8) Int16x8 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVW, CPU Feature: AVX512EVEX +func (x Int16x16) MaskedShiftLeft(y Int16x16, z Mask16x16) Int16x16 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVW, CPU Feature: AVX512EVEX +func (x Int16x32) MaskedShiftLeft(y Int16x32, z Mask16x32) Int16x32 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedShiftLeft(y Int32x4, z Mask32x4) Int32x4 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedShiftLeft(y Int32x8, z Mask32x8) Int32x8 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedShiftLeft(y Int32x16, z Mask32x16) Int32x16 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedShiftLeft(y Int64x2, z Mask64x2) Int64x2 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedShiftLeft(y Int64x4, z Mask64x4) Int64x4 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedShiftLeft(y Int64x8, z Mask64x8) Int64x8 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVW, CPU Feature: AVX512EVEX +func (x Uint16x8) MaskedShiftLeft(y Uint16x8, z Mask16x8) Uint16x8 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVW, CPU Feature: AVX512EVEX +func (x Uint16x16) MaskedShiftLeft(y Uint16x16, z Mask16x16) Uint16x16 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVW, CPU Feature: AVX512EVEX +func (x Uint16x32) MaskedShiftLeft(y Uint16x32, z Mask16x32) Uint16x32 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedShiftLeft(y Uint32x4, z Mask32x4) Uint32x4 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedShiftLeft(y Uint32x8, z Mask32x8) Uint32x8 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVD, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedShiftLeft(y Uint32x16, z Mask32x16) Uint32x16 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedShiftLeft(y Uint64x2, z Mask64x2) Uint64x2 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedShiftLeft(y Uint64x4, z Mask64x4) Uint64x4 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedShiftLeft(y Uint64x8, z Mask64x8) Uint64x8 + +/* MaskedShiftLeftAndFillUpperFrom */ + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVW, CPU Feature: AVX512EVEX +func (x Int16x8) MaskedShiftLeftAndFillUpperFrom(y Int16x8, z Int16x8, u Mask16x8) Int16x8 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVW, CPU Feature: AVX512EVEX +func (x Int16x16) MaskedShiftLeftAndFillUpperFrom(y Int16x16, z Int16x16, u Mask16x16) Int16x16 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVW, CPU Feature: AVX512EVEX +func (x Int16x32) MaskedShiftLeftAndFillUpperFrom(y Int16x32, z Int16x32, u Mask16x32) Int16x32 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedShiftLeftAndFillUpperFrom(y Int32x4, z Int32x4, u Mask32x4) Int32x4 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedShiftLeftAndFillUpperFrom(y Int32x8, z Int32x8, u Mask32x8) Int32x8 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedShiftLeftAndFillUpperFrom(y Int32x16, z Int32x16, u Mask32x16) Int32x16 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedShiftLeftAndFillUpperFrom(y Int64x2, z Int64x2, u Mask64x2) Int64x2 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedShiftLeftAndFillUpperFrom(y Int64x4, z Int64x4, u Mask64x4) Int64x4 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedShiftLeftAndFillUpperFrom(y Int64x8, z Int64x8, u Mask64x8) Int64x8 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVW, CPU Feature: AVX512EVEX +func (x Uint16x8) MaskedShiftLeftAndFillUpperFrom(y Uint16x8, z Uint16x8, u Mask16x8) Uint16x8 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVW, CPU Feature: AVX512EVEX +func (x Uint16x16) MaskedShiftLeftAndFillUpperFrom(y Uint16x16, z Uint16x16, u Mask16x16) Uint16x16 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVW, CPU Feature: AVX512EVEX +func (x Uint16x32) MaskedShiftLeftAndFillUpperFrom(y Uint16x32, z Uint16x32, u Mask16x32) Uint16x32 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedShiftLeftAndFillUpperFrom(y Uint32x4, z Uint32x4, u Mask32x4) Uint32x4 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedShiftLeftAndFillUpperFrom(y Uint32x8, z Uint32x8, u Mask32x8) Uint32x8 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVD, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedShiftLeftAndFillUpperFrom(y Uint32x16, z Uint32x16, u Mask32x16) Uint32x16 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedShiftLeftAndFillUpperFrom(y Uint64x2, z Uint64x2, u Mask64x2) Uint64x2 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedShiftLeftAndFillUpperFrom(y Uint64x4, z Uint64x4, u Mask64x4) Uint64x4 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedShiftLeftAndFillUpperFrom(y Uint64x8, z Uint64x8, u Mask64x8) Uint64x8 + +/* MaskedShiftRight */ + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVW, CPU Feature: AVX512EVEX +func (x Int16x8) MaskedShiftRight(y Int16x8, z Mask16x8) Int16x8 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVW, CPU Feature: AVX512EVEX +func (x Int16x16) MaskedShiftRight(y Int16x16, z Mask16x16) Int16x16 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVW, CPU Feature: AVX512EVEX +func (x Int16x32) MaskedShiftRight(y Int16x32, z Mask16x32) Int16x32 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedShiftRight(y Int32x4, z Mask32x4) Int32x4 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedShiftRight(y Int32x8, z Mask32x8) Int32x8 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedShiftRight(y Int32x16, z Mask32x16) Int32x16 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedShiftRight(y Int64x2, z Mask64x2) Int64x2 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedShiftRight(y Int64x4, z Mask64x4) Int64x4 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedShiftRight(y Int64x8, z Mask64x8) Int64x8 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVW, CPU Feature: AVX512EVEX +func (x Uint16x8) MaskedShiftRight(y Uint16x8, z Mask16x8) Uint16x8 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVW, CPU Feature: AVX512EVEX +func (x Uint16x16) MaskedShiftRight(y Uint16x16, z Mask16x16) Uint16x16 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVW, CPU Feature: AVX512EVEX +func (x Uint16x32) MaskedShiftRight(y Uint16x32, z Mask16x32) Uint16x32 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedShiftRight(y Uint32x4, z Mask32x4) Uint32x4 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedShiftRight(y Uint32x8, z Mask32x8) Uint32x8 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVD, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedShiftRight(y Uint32x16, z Mask32x16) Uint32x16 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedShiftRight(y Uint64x2, z Mask64x2) Uint64x2 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedShiftRight(y Uint64x4, z Mask64x4) Uint64x4 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedShiftRight(y Uint64x8, z Mask64x8) Uint64x8 + +/* MaskedShiftRightAndFillUpperFrom */ + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVW, CPU Feature: AVX512EVEX +func (x Int16x8) MaskedShiftRightAndFillUpperFrom(y Int16x8, z Int16x8, u Mask16x8) Int16x8 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVW, CPU Feature: AVX512EVEX +func (x Int16x16) MaskedShiftRightAndFillUpperFrom(y Int16x16, z Int16x16, u Mask16x16) Int16x16 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVW, CPU Feature: AVX512EVEX +func (x Int16x32) MaskedShiftRightAndFillUpperFrom(y Int16x32, z Int16x32, u Mask16x32) Int16x32 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedShiftRightAndFillUpperFrom(y Int32x4, z Int32x4, u Mask32x4) Int32x4 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedShiftRightAndFillUpperFrom(y Int32x8, z Int32x8, u Mask32x8) Int32x8 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedShiftRightAndFillUpperFrom(y Int32x16, z Int32x16, u Mask32x16) Int32x16 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedShiftRightAndFillUpperFrom(y Int64x2, z Int64x2, u Mask64x2) Int64x2 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedShiftRightAndFillUpperFrom(y Int64x4, z Int64x4, u Mask64x4) Int64x4 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedShiftRightAndFillUpperFrom(y Int64x8, z Int64x8, u Mask64x8) Int64x8 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVW, CPU Feature: AVX512EVEX +func (x Uint16x8) MaskedShiftRightAndFillUpperFrom(y Uint16x8, z Uint16x8, u Mask16x8) Uint16x8 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVW, CPU Feature: AVX512EVEX +func (x Uint16x16) MaskedShiftRightAndFillUpperFrom(y Uint16x16, z Uint16x16, u Mask16x16) Uint16x16 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVW, CPU Feature: AVX512EVEX +func (x Uint16x32) MaskedShiftRightAndFillUpperFrom(y Uint16x32, z Uint16x32, u Mask16x32) Uint16x32 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedShiftRightAndFillUpperFrom(y Uint32x4, z Uint32x4, u Mask32x4) Uint32x4 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedShiftRightAndFillUpperFrom(y Uint32x8, z Uint32x8, u Mask32x8) Uint32x8 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVD, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedShiftRightAndFillUpperFrom(y Uint32x16, z Uint32x16, u Mask32x16) Uint32x16 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedShiftRightAndFillUpperFrom(y Uint64x2, z Uint64x2, u Mask64x2) Uint64x2 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedShiftRightAndFillUpperFrom(y Uint64x4, z Uint64x4, u Mask64x4) Uint64x4 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedShiftRightAndFillUpperFrom(y Uint64x8, z Uint64x8, u Mask64x8) Uint64x8 + +/* MaskedShiftRightSignExtended */ + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVW, CPU Feature: AVX512EVEX +func (x Int16x8) MaskedShiftRightSignExtended(y Int16x8, z Mask16x8) Int16x8 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVW, CPU Feature: AVX512EVEX +func (x Int16x16) MaskedShiftRightSignExtended(y Int16x16, z Mask16x16) Int16x16 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVW, CPU Feature: AVX512EVEX +func (x Int16x32) MaskedShiftRightSignExtended(y Int16x32, z Mask16x32) Int16x32 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVD, CPU Feature: AVX512EVEX +func (x Int32x4) MaskedShiftRightSignExtended(y Int32x4, z Mask32x4) Int32x4 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVD, CPU Feature: AVX512EVEX +func (x Int32x8) MaskedShiftRightSignExtended(y Int32x8, z Mask32x8) Int32x8 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVD, CPU Feature: AVX512EVEX +func (x Int32x16) MaskedShiftRightSignExtended(y Int32x16, z Mask32x16) Int32x16 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVQ, CPU Feature: AVX512EVEX +func (x Int64x2) MaskedShiftRightSignExtended(y Int64x2, z Mask64x2) Int64x2 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVQ, CPU Feature: AVX512EVEX +func (x Int64x4) MaskedShiftRightSignExtended(y Int64x4, z Mask64x4) Int64x4 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVQ, CPU Feature: AVX512EVEX +func (x Int64x8) MaskedShiftRightSignExtended(y Int64x8, z Mask64x8) Int64x8 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVW, CPU Feature: AVX512EVEX +func (x Uint16x8) MaskedShiftRightSignExtended(y Uint16x8, z Mask16x8) Uint16x8 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVW, CPU Feature: AVX512EVEX +func (x Uint16x16) MaskedShiftRightSignExtended(y Uint16x16, z Mask16x16) Uint16x16 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVW, CPU Feature: AVX512EVEX +func (x Uint16x32) MaskedShiftRightSignExtended(y Uint16x32, z Mask16x32) Uint16x32 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaskedShiftRightSignExtended(y Uint32x4, z Mask32x4) Uint32x4 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaskedShiftRightSignExtended(y Uint32x8, z Mask32x8) Uint32x8 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVD, CPU Feature: AVX512EVEX +func (x Uint32x16) MaskedShiftRightSignExtended(y Uint32x16, z Mask32x16) Uint32x16 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MaskedShiftRightSignExtended(y Uint64x2, z Mask64x2) Uint64x2 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MaskedShiftRightSignExtended(y Uint64x4, z Mask64x4) Uint64x4 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MaskedShiftRightSignExtended(y Uint64x8, z Mask64x8) Uint64x8 + /* MaskedSqrt */ // Sqrt computes the square root of each element. @@ -6961,6 +8006,254 @@ func (x Uint64x4) PopCount() Uint64x4 // Asm: VPOPCNTQ, CPU Feature: AVX512EVEX func (x Uint64x8) PopCount() Uint64x8 +/* RotateAllLeft */ + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLD, CPU Feature: AVX512EVEX +func (x Int32x4) RotateAllLeft(imm8 uint8) Int32x4 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLD, CPU Feature: AVX512EVEX +func (x Int32x8) RotateAllLeft(imm8 uint8) Int32x8 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLD, CPU Feature: AVX512EVEX +func (x Int32x16) RotateAllLeft(imm8 uint8) Int32x16 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLQ, CPU Feature: AVX512EVEX +func (x Int64x2) RotateAllLeft(imm8 uint8) Int64x2 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLQ, CPU Feature: AVX512EVEX +func (x Int64x4) RotateAllLeft(imm8 uint8) Int64x4 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLQ, CPU Feature: AVX512EVEX +func (x Int64x8) RotateAllLeft(imm8 uint8) Int64x8 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLD, CPU Feature: AVX512EVEX +func (x Uint32x4) RotateAllLeft(imm8 uint8) Uint32x4 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLD, CPU Feature: AVX512EVEX +func (x Uint32x8) RotateAllLeft(imm8 uint8) Uint32x8 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLD, CPU Feature: AVX512EVEX +func (x Uint32x16) RotateAllLeft(imm8 uint8) Uint32x16 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLQ, CPU Feature: AVX512EVEX +func (x Uint64x2) RotateAllLeft(imm8 uint8) Uint64x2 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLQ, CPU Feature: AVX512EVEX +func (x Uint64x4) RotateAllLeft(imm8 uint8) Uint64x4 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLQ, CPU Feature: AVX512EVEX +func (x Uint64x8) RotateAllLeft(imm8 uint8) Uint64x8 + +/* RotateAllRight */ + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORD, CPU Feature: AVX512EVEX +func (x Int32x4) RotateAllRight(imm8 uint8) Int32x4 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORD, CPU Feature: AVX512EVEX +func (x Int32x8) RotateAllRight(imm8 uint8) Int32x8 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORD, CPU Feature: AVX512EVEX +func (x Int32x16) RotateAllRight(imm8 uint8) Int32x16 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORQ, CPU Feature: AVX512EVEX +func (x Int64x2) RotateAllRight(imm8 uint8) Int64x2 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORQ, CPU Feature: AVX512EVEX +func (x Int64x4) RotateAllRight(imm8 uint8) Int64x4 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORQ, CPU Feature: AVX512EVEX +func (x Int64x8) RotateAllRight(imm8 uint8) Int64x8 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORD, CPU Feature: AVX512EVEX +func (x Uint32x4) RotateAllRight(imm8 uint8) Uint32x4 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORD, CPU Feature: AVX512EVEX +func (x Uint32x8) RotateAllRight(imm8 uint8) Uint32x8 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORD, CPU Feature: AVX512EVEX +func (x Uint32x16) RotateAllRight(imm8 uint8) Uint32x16 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORQ, CPU Feature: AVX512EVEX +func (x Uint64x2) RotateAllRight(imm8 uint8) Uint64x2 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORQ, CPU Feature: AVX512EVEX +func (x Uint64x4) RotateAllRight(imm8 uint8) Uint64x4 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORQ, CPU Feature: AVX512EVEX +func (x Uint64x8) RotateAllRight(imm8 uint8) Uint64x8 + +/* RotateLeft */ + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVD, CPU Feature: AVX512EVEX +func (x Int32x4) RotateLeft(y Int32x4) Int32x4 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVD, CPU Feature: AVX512EVEX +func (x Int32x8) RotateLeft(y Int32x8) Int32x8 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVD, CPU Feature: AVX512EVEX +func (x Int32x16) RotateLeft(y Int32x16) Int32x16 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVQ, CPU Feature: AVX512EVEX +func (x Int64x2) RotateLeft(y Int64x2) Int64x2 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVQ, CPU Feature: AVX512EVEX +func (x Int64x4) RotateLeft(y Int64x4) Int64x4 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVQ, CPU Feature: AVX512EVEX +func (x Int64x8) RotateLeft(y Int64x8) Int64x8 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVD, CPU Feature: AVX512EVEX +func (x Uint32x4) RotateLeft(y Uint32x4) Uint32x4 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVD, CPU Feature: AVX512EVEX +func (x Uint32x8) RotateLeft(y Uint32x8) Uint32x8 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVD, CPU Feature: AVX512EVEX +func (x Uint32x16) RotateLeft(y Uint32x16) Uint32x16 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVQ, CPU Feature: AVX512EVEX +func (x Uint64x2) RotateLeft(y Uint64x2) Uint64x2 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVQ, CPU Feature: AVX512EVEX +func (x Uint64x4) RotateLeft(y Uint64x4) Uint64x4 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVQ, CPU Feature: AVX512EVEX +func (x Uint64x8) RotateLeft(y Uint64x8) Uint64x8 + +/* RotateRight */ + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVD, CPU Feature: AVX512EVEX +func (x Int32x4) RotateRight(y Int32x4) Int32x4 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVD, CPU Feature: AVX512EVEX +func (x Int32x8) RotateRight(y Int32x8) Int32x8 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVD, CPU Feature: AVX512EVEX +func (x Int32x16) RotateRight(y Int32x16) Int32x16 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVQ, CPU Feature: AVX512EVEX +func (x Int64x2) RotateRight(y Int64x2) Int64x2 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVQ, CPU Feature: AVX512EVEX +func (x Int64x4) RotateRight(y Int64x4) Int64x4 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVQ, CPU Feature: AVX512EVEX +func (x Int64x8) RotateRight(y Int64x8) Int64x8 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVD, CPU Feature: AVX512EVEX +func (x Uint32x4) RotateRight(y Uint32x4) Uint32x4 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVD, CPU Feature: AVX512EVEX +func (x Uint32x8) RotateRight(y Uint32x8) Uint32x8 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVD, CPU Feature: AVX512EVEX +func (x Uint32x16) RotateRight(y Uint32x16) Uint32x16 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVQ, CPU Feature: AVX512EVEX +func (x Uint64x2) RotateRight(y Uint64x2) Uint64x2 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVQ, CPU Feature: AVX512EVEX +func (x Uint64x4) RotateRight(y Uint64x4) Uint64x4 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVQ, CPU Feature: AVX512EVEX +func (x Uint64x8) RotateRight(y Uint64x8) Uint64x8 + /* Round */ // Round rounds elements to the nearest integer. @@ -7326,6 +8619,903 @@ func (x Uint32x4) SetElem(imm uint8, y uint32) Uint32x4 // Asm: VPINSRQ, CPU Feature: AVX func (x Uint64x2) SetElem(imm uint8, y uint64) Uint64x2 +/* ShiftAllLeft */ + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLW, CPU Feature: AVX +func (x Int16x8) ShiftAllLeft(y uint64) Int16x8 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLW, CPU Feature: AVX2 +func (x Int16x16) ShiftAllLeft(y uint64) Int16x16 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLD, CPU Feature: AVX +func (x Int32x4) ShiftAllLeft(y uint64) Int32x4 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLD, CPU Feature: AVX2 +func (x Int32x8) ShiftAllLeft(y uint64) Int32x8 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLQ, CPU Feature: AVX +func (x Int64x2) ShiftAllLeft(y uint64) Int64x2 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLQ, CPU Feature: AVX2 +func (x Int64x4) ShiftAllLeft(y uint64) Int64x4 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLQ, CPU Feature: AVX512EVEX +func (x Int64x8) ShiftAllLeft(y uint64) Int64x8 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLW, CPU Feature: AVX +func (x Uint16x8) ShiftAllLeft(y uint64) Uint16x8 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLW, CPU Feature: AVX2 +func (x Uint16x16) ShiftAllLeft(y uint64) Uint16x16 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLD, CPU Feature: AVX +func (x Uint32x4) ShiftAllLeft(y uint64) Uint32x4 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLD, CPU Feature: AVX2 +func (x Uint32x8) ShiftAllLeft(y uint64) Uint32x8 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLQ, CPU Feature: AVX +func (x Uint64x2) ShiftAllLeft(y uint64) Uint64x2 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLQ, CPU Feature: AVX2 +func (x Uint64x4) ShiftAllLeft(y uint64) Uint64x4 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLQ, CPU Feature: AVX512EVEX +func (x Uint64x8) ShiftAllLeft(y uint64) Uint64x8 + +/* ShiftAllLeftAndFillUpperFrom */ + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDW, CPU Feature: AVX512EVEX +func (x Int16x8) ShiftAllLeftAndFillUpperFrom(imm uint8, y Int16x8) Int16x8 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDW, CPU Feature: AVX512EVEX +func (x Int16x16) ShiftAllLeftAndFillUpperFrom(imm uint8, y Int16x16) Int16x16 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDW, CPU Feature: AVX512EVEX +func (x Int16x32) ShiftAllLeftAndFillUpperFrom(imm uint8, y Int16x32) Int16x32 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDD, CPU Feature: AVX512EVEX +func (x Int32x4) ShiftAllLeftAndFillUpperFrom(imm uint8, y Int32x4) Int32x4 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDD, CPU Feature: AVX512EVEX +func (x Int32x8) ShiftAllLeftAndFillUpperFrom(imm uint8, y Int32x8) Int32x8 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDD, CPU Feature: AVX512EVEX +func (x Int32x16) ShiftAllLeftAndFillUpperFrom(imm uint8, y Int32x16) Int32x16 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDQ, CPU Feature: AVX512EVEX +func (x Int64x2) ShiftAllLeftAndFillUpperFrom(imm uint8, y Int64x2) Int64x2 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDQ, CPU Feature: AVX512EVEX +func (x Int64x4) ShiftAllLeftAndFillUpperFrom(imm uint8, y Int64x4) Int64x4 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDQ, CPU Feature: AVX512EVEX +func (x Int64x8) ShiftAllLeftAndFillUpperFrom(imm uint8, y Int64x8) Int64x8 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDW, CPU Feature: AVX512EVEX +func (x Uint16x8) ShiftAllLeftAndFillUpperFrom(imm uint8, y Uint16x8) Uint16x8 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDW, CPU Feature: AVX512EVEX +func (x Uint16x16) ShiftAllLeftAndFillUpperFrom(imm uint8, y Uint16x16) Uint16x16 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDW, CPU Feature: AVX512EVEX +func (x Uint16x32) ShiftAllLeftAndFillUpperFrom(imm uint8, y Uint16x32) Uint16x32 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDD, CPU Feature: AVX512EVEX +func (x Uint32x4) ShiftAllLeftAndFillUpperFrom(imm uint8, y Uint32x4) Uint32x4 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDD, CPU Feature: AVX512EVEX +func (x Uint32x8) ShiftAllLeftAndFillUpperFrom(imm uint8, y Uint32x8) Uint32x8 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDD, CPU Feature: AVX512EVEX +func (x Uint32x16) ShiftAllLeftAndFillUpperFrom(imm uint8, y Uint32x16) Uint32x16 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDQ, CPU Feature: AVX512EVEX +func (x Uint64x2) ShiftAllLeftAndFillUpperFrom(imm uint8, y Uint64x2) Uint64x2 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDQ, CPU Feature: AVX512EVEX +func (x Uint64x4) ShiftAllLeftAndFillUpperFrom(imm uint8, y Uint64x4) Uint64x4 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDQ, CPU Feature: AVX512EVEX +func (x Uint64x8) ShiftAllLeftAndFillUpperFrom(imm uint8, y Uint64x8) Uint64x8 + +/* ShiftAllRight */ + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLW, CPU Feature: AVX +func (x Int16x8) ShiftAllRight(y uint64) Int16x8 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLW, CPU Feature: AVX2 +func (x Int16x16) ShiftAllRight(y uint64) Int16x16 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLD, CPU Feature: AVX +func (x Int32x4) ShiftAllRight(y uint64) Int32x4 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLD, CPU Feature: AVX2 +func (x Int32x8) ShiftAllRight(y uint64) Int32x8 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLQ, CPU Feature: AVX +func (x Int64x2) ShiftAllRight(y uint64) Int64x2 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLQ, CPU Feature: AVX2 +func (x Int64x4) ShiftAllRight(y uint64) Int64x4 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLQ, CPU Feature: AVX512EVEX +func (x Int64x8) ShiftAllRight(y uint64) Int64x8 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLW, CPU Feature: AVX +func (x Uint16x8) ShiftAllRight(y uint64) Uint16x8 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLW, CPU Feature: AVX2 +func (x Uint16x16) ShiftAllRight(y uint64) Uint16x16 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLD, CPU Feature: AVX +func (x Uint32x4) ShiftAllRight(y uint64) Uint32x4 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLD, CPU Feature: AVX2 +func (x Uint32x8) ShiftAllRight(y uint64) Uint32x8 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLQ, CPU Feature: AVX +func (x Uint64x2) ShiftAllRight(y uint64) Uint64x2 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLQ, CPU Feature: AVX2 +func (x Uint64x4) ShiftAllRight(y uint64) Uint64x4 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLQ, CPU Feature: AVX512EVEX +func (x Uint64x8) ShiftAllRight(y uint64) Uint64x8 + +/* ShiftAllRightAndFillUpperFrom */ + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDW, CPU Feature: AVX512EVEX +func (x Int16x8) ShiftAllRightAndFillUpperFrom(imm uint8, y Int16x8) Int16x8 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDW, CPU Feature: AVX512EVEX +func (x Int16x16) ShiftAllRightAndFillUpperFrom(imm uint8, y Int16x16) Int16x16 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDW, CPU Feature: AVX512EVEX +func (x Int16x32) ShiftAllRightAndFillUpperFrom(imm uint8, y Int16x32) Int16x32 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDD, CPU Feature: AVX512EVEX +func (x Int32x4) ShiftAllRightAndFillUpperFrom(imm uint8, y Int32x4) Int32x4 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDD, CPU Feature: AVX512EVEX +func (x Int32x8) ShiftAllRightAndFillUpperFrom(imm uint8, y Int32x8) Int32x8 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDD, CPU Feature: AVX512EVEX +func (x Int32x16) ShiftAllRightAndFillUpperFrom(imm uint8, y Int32x16) Int32x16 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDQ, CPU Feature: AVX512EVEX +func (x Int64x2) ShiftAllRightAndFillUpperFrom(imm uint8, y Int64x2) Int64x2 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDQ, CPU Feature: AVX512EVEX +func (x Int64x4) ShiftAllRightAndFillUpperFrom(imm uint8, y Int64x4) Int64x4 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDQ, CPU Feature: AVX512EVEX +func (x Int64x8) ShiftAllRightAndFillUpperFrom(imm uint8, y Int64x8) Int64x8 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDW, CPU Feature: AVX512EVEX +func (x Uint16x8) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint16x8) Uint16x8 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDW, CPU Feature: AVX512EVEX +func (x Uint16x16) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint16x16) Uint16x16 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDW, CPU Feature: AVX512EVEX +func (x Uint16x32) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint16x32) Uint16x32 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDD, CPU Feature: AVX512EVEX +func (x Uint32x4) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint32x4) Uint32x4 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDD, CPU Feature: AVX512EVEX +func (x Uint32x8) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint32x8) Uint32x8 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDD, CPU Feature: AVX512EVEX +func (x Uint32x16) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint32x16) Uint32x16 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDQ, CPU Feature: AVX512EVEX +func (x Uint64x2) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint64x2) Uint64x2 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDQ, CPU Feature: AVX512EVEX +func (x Uint64x4) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint64x4) Uint64x4 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDQ, CPU Feature: AVX512EVEX +func (x Uint64x8) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint64x8) Uint64x8 + +/* ShiftAllRightSignExtended */ + +// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAW, CPU Feature: AVX +func (x Int16x8) ShiftAllRightSignExtended(y uint64) Int16x8 + +// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAW, CPU Feature: AVX2 +func (x Int16x16) ShiftAllRightSignExtended(y uint64) Int16x16 + +// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAD, CPU Feature: AVX +func (x Int32x4) ShiftAllRightSignExtended(y uint64) Int32x4 + +// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAD, CPU Feature: AVX2 +func (x Int32x8) ShiftAllRightSignExtended(y uint64) Int32x8 + +// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAQ, CPU Feature: AVX512EVEX +func (x Int64x2) ShiftAllRightSignExtended(y uint64) Int64x2 + +// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAQ, CPU Feature: AVX512EVEX +func (x Int64x4) ShiftAllRightSignExtended(y uint64) Int64x4 + +// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAQ, CPU Feature: AVX512EVEX +func (x Int64x8) ShiftAllRightSignExtended(y uint64) Int64x8 + +/* ShiftLeft */ + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVW, CPU Feature: AVX512EVEX +func (x Int16x8) ShiftLeft(y Int16x8) Int16x8 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVW, CPU Feature: AVX512EVEX +func (x Int16x16) ShiftLeft(y Int16x16) Int16x16 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVW, CPU Feature: AVX512EVEX +func (x Int16x32) ShiftLeft(y Int16x32) Int16x32 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVD, CPU Feature: AVX2 +func (x Int32x4) ShiftLeft(y Int32x4) Int32x4 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVD, CPU Feature: AVX2 +func (x Int32x8) ShiftLeft(y Int32x8) Int32x8 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVD, CPU Feature: AVX512EVEX +func (x Int32x16) ShiftLeft(y Int32x16) Int32x16 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVQ, CPU Feature: AVX2 +func (x Int64x2) ShiftLeft(y Int64x2) Int64x2 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVQ, CPU Feature: AVX2 +func (x Int64x4) ShiftLeft(y Int64x4) Int64x4 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVQ, CPU Feature: AVX512EVEX +func (x Int64x8) ShiftLeft(y Int64x8) Int64x8 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVW, CPU Feature: AVX512EVEX +func (x Uint16x8) ShiftLeft(y Uint16x8) Uint16x8 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVW, CPU Feature: AVX512EVEX +func (x Uint16x16) ShiftLeft(y Uint16x16) Uint16x16 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVW, CPU Feature: AVX512EVEX +func (x Uint16x32) ShiftLeft(y Uint16x32) Uint16x32 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVD, CPU Feature: AVX2 +func (x Uint32x4) ShiftLeft(y Uint32x4) Uint32x4 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVD, CPU Feature: AVX2 +func (x Uint32x8) ShiftLeft(y Uint32x8) Uint32x8 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVD, CPU Feature: AVX512EVEX +func (x Uint32x16) ShiftLeft(y Uint32x16) Uint32x16 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVQ, CPU Feature: AVX2 +func (x Uint64x2) ShiftLeft(y Uint64x2) Uint64x2 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVQ, CPU Feature: AVX2 +func (x Uint64x4) ShiftLeft(y Uint64x4) Uint64x4 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVQ, CPU Feature: AVX512EVEX +func (x Uint64x8) ShiftLeft(y Uint64x8) Uint64x8 + +/* ShiftLeftAndFillUpperFrom */ + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVW, CPU Feature: AVX512EVEX +func (x Int16x8) ShiftLeftAndFillUpperFrom(y Int16x8, z Int16x8) Int16x8 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVW, CPU Feature: AVX512EVEX +func (x Int16x16) ShiftLeftAndFillUpperFrom(y Int16x16, z Int16x16) Int16x16 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVW, CPU Feature: AVX512EVEX +func (x Int16x32) ShiftLeftAndFillUpperFrom(y Int16x32, z Int16x32) Int16x32 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVD, CPU Feature: AVX512EVEX +func (x Int32x4) ShiftLeftAndFillUpperFrom(y Int32x4, z Int32x4) Int32x4 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVD, CPU Feature: AVX512EVEX +func (x Int32x8) ShiftLeftAndFillUpperFrom(y Int32x8, z Int32x8) Int32x8 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVD, CPU Feature: AVX512EVEX +func (x Int32x16) ShiftLeftAndFillUpperFrom(y Int32x16, z Int32x16) Int32x16 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX +func (x Int64x2) ShiftLeftAndFillUpperFrom(y Int64x2, z Int64x2) Int64x2 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX +func (x Int64x4) ShiftLeftAndFillUpperFrom(y Int64x4, z Int64x4) Int64x4 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX +func (x Int64x8) ShiftLeftAndFillUpperFrom(y Int64x8, z Int64x8) Int64x8 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVW, CPU Feature: AVX512EVEX +func (x Uint16x8) ShiftLeftAndFillUpperFrom(y Uint16x8, z Uint16x8) Uint16x8 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVW, CPU Feature: AVX512EVEX +func (x Uint16x16) ShiftLeftAndFillUpperFrom(y Uint16x16, z Uint16x16) Uint16x16 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVW, CPU Feature: AVX512EVEX +func (x Uint16x32) ShiftLeftAndFillUpperFrom(y Uint16x32, z Uint16x32) Uint16x32 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVD, CPU Feature: AVX512EVEX +func (x Uint32x4) ShiftLeftAndFillUpperFrom(y Uint32x4, z Uint32x4) Uint32x4 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVD, CPU Feature: AVX512EVEX +func (x Uint32x8) ShiftLeftAndFillUpperFrom(y Uint32x8, z Uint32x8) Uint32x8 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVD, CPU Feature: AVX512EVEX +func (x Uint32x16) ShiftLeftAndFillUpperFrom(y Uint32x16, z Uint32x16) Uint32x16 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX +func (x Uint64x2) ShiftLeftAndFillUpperFrom(y Uint64x2, z Uint64x2) Uint64x2 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX +func (x Uint64x4) ShiftLeftAndFillUpperFrom(y Uint64x4, z Uint64x4) Uint64x4 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX +func (x Uint64x8) ShiftLeftAndFillUpperFrom(y Uint64x8, z Uint64x8) Uint64x8 + +/* ShiftRight */ + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVW, CPU Feature: AVX512EVEX +func (x Int16x8) ShiftRight(y Int16x8) Int16x8 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVW, CPU Feature: AVX512EVEX +func (x Int16x16) ShiftRight(y Int16x16) Int16x16 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVW, CPU Feature: AVX512EVEX +func (x Int16x32) ShiftRight(y Int16x32) Int16x32 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVD, CPU Feature: AVX2 +func (x Int32x4) ShiftRight(y Int32x4) Int32x4 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVD, CPU Feature: AVX2 +func (x Int32x8) ShiftRight(y Int32x8) Int32x8 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVD, CPU Feature: AVX512EVEX +func (x Int32x16) ShiftRight(y Int32x16) Int32x16 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVQ, CPU Feature: AVX2 +func (x Int64x2) ShiftRight(y Int64x2) Int64x2 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVQ, CPU Feature: AVX2 +func (x Int64x4) ShiftRight(y Int64x4) Int64x4 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVQ, CPU Feature: AVX512EVEX +func (x Int64x8) ShiftRight(y Int64x8) Int64x8 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVW, CPU Feature: AVX512EVEX +func (x Uint16x8) ShiftRight(y Uint16x8) Uint16x8 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVW, CPU Feature: AVX512EVEX +func (x Uint16x16) ShiftRight(y Uint16x16) Uint16x16 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVW, CPU Feature: AVX512EVEX +func (x Uint16x32) ShiftRight(y Uint16x32) Uint16x32 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVD, CPU Feature: AVX2 +func (x Uint32x4) ShiftRight(y Uint32x4) Uint32x4 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVD, CPU Feature: AVX2 +func (x Uint32x8) ShiftRight(y Uint32x8) Uint32x8 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVD, CPU Feature: AVX512EVEX +func (x Uint32x16) ShiftRight(y Uint32x16) Uint32x16 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVQ, CPU Feature: AVX2 +func (x Uint64x2) ShiftRight(y Uint64x2) Uint64x2 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVQ, CPU Feature: AVX2 +func (x Uint64x4) ShiftRight(y Uint64x4) Uint64x4 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVQ, CPU Feature: AVX512EVEX +func (x Uint64x8) ShiftRight(y Uint64x8) Uint64x8 + +/* ShiftRightAndFillUpperFrom */ + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVW, CPU Feature: AVX512EVEX +func (x Int16x8) ShiftRightAndFillUpperFrom(y Int16x8, z Int16x8) Int16x8 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVW, CPU Feature: AVX512EVEX +func (x Int16x16) ShiftRightAndFillUpperFrom(y Int16x16, z Int16x16) Int16x16 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVW, CPU Feature: AVX512EVEX +func (x Int16x32) ShiftRightAndFillUpperFrom(y Int16x32, z Int16x32) Int16x32 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVD, CPU Feature: AVX512EVEX +func (x Int32x4) ShiftRightAndFillUpperFrom(y Int32x4, z Int32x4) Int32x4 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVD, CPU Feature: AVX512EVEX +func (x Int32x8) ShiftRightAndFillUpperFrom(y Int32x8, z Int32x8) Int32x8 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVD, CPU Feature: AVX512EVEX +func (x Int32x16) ShiftRightAndFillUpperFrom(y Int32x16, z Int32x16) Int32x16 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX +func (x Int64x2) ShiftRightAndFillUpperFrom(y Int64x2, z Int64x2) Int64x2 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX +func (x Int64x4) ShiftRightAndFillUpperFrom(y Int64x4, z Int64x4) Int64x4 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX +func (x Int64x8) ShiftRightAndFillUpperFrom(y Int64x8, z Int64x8) Int64x8 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVW, CPU Feature: AVX512EVEX +func (x Uint16x8) ShiftRightAndFillUpperFrom(y Uint16x8, z Uint16x8) Uint16x8 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVW, CPU Feature: AVX512EVEX +func (x Uint16x16) ShiftRightAndFillUpperFrom(y Uint16x16, z Uint16x16) Uint16x16 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVW, CPU Feature: AVX512EVEX +func (x Uint16x32) ShiftRightAndFillUpperFrom(y Uint16x32, z Uint16x32) Uint16x32 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVD, CPU Feature: AVX512EVEX +func (x Uint32x4) ShiftRightAndFillUpperFrom(y Uint32x4, z Uint32x4) Uint32x4 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVD, CPU Feature: AVX512EVEX +func (x Uint32x8) ShiftRightAndFillUpperFrom(y Uint32x8, z Uint32x8) Uint32x8 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVD, CPU Feature: AVX512EVEX +func (x Uint32x16) ShiftRightAndFillUpperFrom(y Uint32x16, z Uint32x16) Uint32x16 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX +func (x Uint64x2) ShiftRightAndFillUpperFrom(y Uint64x2, z Uint64x2) Uint64x2 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX +func (x Uint64x4) ShiftRightAndFillUpperFrom(y Uint64x4, z Uint64x4) Uint64x4 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX +func (x Uint64x8) ShiftRightAndFillUpperFrom(y Uint64x8, z Uint64x8) Uint64x8 + +/* ShiftRightSignExtended */ + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVW, CPU Feature: AVX512EVEX +func (x Int16x8) ShiftRightSignExtended(y Int16x8) Int16x8 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVW, CPU Feature: AVX512EVEX +func (x Int16x16) ShiftRightSignExtended(y Int16x16) Int16x16 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVW, CPU Feature: AVX512EVEX +func (x Int16x32) ShiftRightSignExtended(y Int16x32) Int16x32 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVD, CPU Feature: AVX2 +func (x Int32x4) ShiftRightSignExtended(y Int32x4) Int32x4 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVD, CPU Feature: AVX2 +func (x Int32x8) ShiftRightSignExtended(y Int32x8) Int32x8 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVD, CPU Feature: AVX512EVEX +func (x Int32x16) ShiftRightSignExtended(y Int32x16) Int32x16 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVQ, CPU Feature: AVX512EVEX +func (x Int64x2) ShiftRightSignExtended(y Int64x2) Int64x2 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVQ, CPU Feature: AVX512EVEX +func (x Int64x4) ShiftRightSignExtended(y Int64x4) Int64x4 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVQ, CPU Feature: AVX512EVEX +func (x Int64x8) ShiftRightSignExtended(y Int64x8) Int64x8 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVW, CPU Feature: AVX512EVEX +func (x Uint16x8) ShiftRightSignExtended(y Uint16x8) Uint16x8 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVW, CPU Feature: AVX512EVEX +func (x Uint16x16) ShiftRightSignExtended(y Uint16x16) Uint16x16 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVW, CPU Feature: AVX512EVEX +func (x Uint16x32) ShiftRightSignExtended(y Uint16x32) Uint16x32 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVD, CPU Feature: AVX2 +func (x Uint32x4) ShiftRightSignExtended(y Uint32x4) Uint32x4 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVD, CPU Feature: AVX2 +func (x Uint32x8) ShiftRightSignExtended(y Uint32x8) Uint32x8 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVD, CPU Feature: AVX512EVEX +func (x Uint32x16) ShiftRightSignExtended(y Uint32x16) Uint32x16 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVQ, CPU Feature: AVX512EVEX +func (x Uint64x2) ShiftRightSignExtended(y Uint64x2) Uint64x2 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVQ, CPU Feature: AVX512EVEX +func (x Uint64x4) ShiftRightSignExtended(y Uint64x4) Uint64x4 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVQ, CPU Feature: AVX512EVEX +func (x Uint64x8) ShiftRightSignExtended(y Uint64x8) Uint64x8 + /* Sign */ // Sign returns the product of the first operand with -1, 0, or 1,