From e61ebfce564086e5e2d634b0d138d96b6e34c19a Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Tue, 24 Jun 2025 15:21:29 +0000
Subject: [PATCH] [dev.simd] cmd/compile, simd: add shift operations

This CL is generated by CL 683475.

Change-Id: I9e3ac6aff6f711cb26ff85e4c8729d9e2cc38e7d
Reviewed-on: https://go-review.googlesource.com/c/go/+/683715
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 src/cmd/compile/internal/amd64/simdssa.go     |  312 +-
 .../compile/internal/ssa/_gen/simdAMD64.rules |  398 ++
 .../compile/internal/ssa/_gen/simdAMD64ops.go |  204 +
 .../internal/ssa/_gen/simdgenericOps.go       |  398 ++
 src/cmd/compile/internal/ssa/opGen.go         | 5772 +++++++++++++++++
 src/cmd/compile/internal/ssa/rewriteAMD64.go  | 5343 +++++++++++++++
 .../compile/internal/ssagen/simdintrinsics.go |  398 ++
 src/simd/simd_wrapped_test.go                 | 1245 ++++
 src/simd/stubs_amd64.go                       | 2190 +++++++
 9 files changed, 16257 insertions(+), 3 deletions(-)

diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go
index 52976803578..6c1d365bfa7 100644
--- a/src/cmd/compile/internal/amd64/simdssa.go
+++ b/src/cmd/compile/internal/amd64/simdssa.go
@@ -247,6 +247,18 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VPHSUBW256,
 		ssa.OpAMD64VPHSUBD128,
 		ssa.OpAMD64VPHSUBD256,
+		ssa.OpAMD64VPROLVD128,
+		ssa.OpAMD64VPROLVD256,
+		ssa.OpAMD64VPROLVD512,
+		ssa.OpAMD64VPROLVQ128,
+		ssa.OpAMD64VPROLVQ256,
+		ssa.OpAMD64VPROLVQ512,
+		ssa.OpAMD64VPRORVD128,
+		ssa.OpAMD64VPRORVD256,
+		ssa.OpAMD64VPRORVD512,
+		ssa.OpAMD64VPRORVQ128,
+		ssa.OpAMD64VPRORVQ256,
+		ssa.OpAMD64VPRORVQ512,
 		ssa.OpAMD64VPADDSB128,
 		ssa.OpAMD64VPADDSB256,
 		ssa.OpAMD64VPADDSB512,
@@ -266,6 +278,33 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VPMADDUBSW128,
 		ssa.OpAMD64VPMADDUBSW256,
 		ssa.OpAMD64VPMADDUBSW512,
+		ssa.OpAMD64VPSLLVW128,
+		ssa.OpAMD64VPSLLVW256,
+		ssa.OpAMD64VPSLLVW512,
+		ssa.OpAMD64VPSLLVD128,
+		ssa.OpAMD64VPSLLVD256,
+		ssa.OpAMD64VPSLLVD512,
+		ssa.OpAMD64VPSLLVQ128,
+		ssa.OpAMD64VPSLLVQ256,
+		ssa.OpAMD64VPSLLVQ512,
+		ssa.OpAMD64VPSRLVW128,
+		ssa.OpAMD64VPSRLVW256,
+		ssa.OpAMD64VPSRLVW512,
+		ssa.OpAMD64VPSRLVD128,
+		ssa.OpAMD64VPSRLVD256,
+		ssa.OpAMD64VPSRLVD512,
+		ssa.OpAMD64VPSRLVQ128,
+		ssa.OpAMD64VPSRLVQ256,
+		ssa.OpAMD64VPSRLVQ512,
+		ssa.OpAMD64VPSRAVW128,
+		ssa.OpAMD64VPSRAVW256,
+		ssa.OpAMD64VPSRAVW512,
+		ssa.OpAMD64VPSRAVD128,
+		ssa.OpAMD64VPSRAVD256,
+		ssa.OpAMD64VPSRAVD512,
+		ssa.OpAMD64VPSRAVQ128,
+		ssa.OpAMD64VPSRAVQ256,
+		ssa.OpAMD64VPSRAVQ512,
 		ssa.OpAMD64VPSIGNB128,
 		ssa.OpAMD64VPSIGNB256,
 		ssa.OpAMD64VPSIGNW128,
@@ -464,6 +503,18 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VPMADDWDMasked128,
 		ssa.OpAMD64VPMADDWDMasked256,
 		ssa.OpAMD64VPMADDWDMasked512,
+		ssa.OpAMD64VPROLVDMasked128,
+		ssa.OpAMD64VPROLVDMasked256,
+		ssa.OpAMD64VPROLVDMasked512,
+		ssa.OpAMD64VPROLVQMasked128,
+		ssa.OpAMD64VPROLVQMasked256,
+		ssa.OpAMD64VPROLVQMasked512,
+		ssa.OpAMD64VPRORVDMasked128,
+		ssa.OpAMD64VPRORVDMasked256,
+		ssa.OpAMD64VPRORVDMasked512,
+		ssa.OpAMD64VPRORVQMasked128,
+		ssa.OpAMD64VPRORVQMasked256,
+		ssa.OpAMD64VPRORVQMasked512,
 		ssa.OpAMD64VPADDSBMasked128,
 		ssa.OpAMD64VPADDSBMasked256,
 		ssa.OpAMD64VPADDSBMasked512,
@@ -479,6 +530,33 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VPMADDUBSWMasked128,
 		ssa.OpAMD64VPMADDUBSWMasked256,
 		ssa.OpAMD64VPMADDUBSWMasked512,
+		ssa.OpAMD64VPSLLVWMasked128,
+		ssa.OpAMD64VPSLLVWMasked256,
+		ssa.OpAMD64VPSLLVWMasked512,
+		ssa.OpAMD64VPSLLVDMasked128,
+		ssa.OpAMD64VPSLLVDMasked256,
+		ssa.OpAMD64VPSLLVDMasked512,
+		ssa.OpAMD64VPSLLVQMasked128,
+		ssa.OpAMD64VPSLLVQMasked256,
+		ssa.OpAMD64VPSLLVQMasked512,
+		ssa.OpAMD64VPSRLVWMasked128,
+		ssa.OpAMD64VPSRLVWMasked256,
+		ssa.OpAMD64VPSRLVWMasked512,
+		ssa.OpAMD64VPSRLVDMasked128,
+		ssa.OpAMD64VPSRLVDMasked256,
+		ssa.OpAMD64VPSRLVDMasked512,
+		ssa.OpAMD64VPSRLVQMasked128,
+		ssa.OpAMD64VPSRLVQMasked256,
+		ssa.OpAMD64VPSRLVQMasked512,
+		ssa.OpAMD64VPSRAVWMasked128,
+		ssa.OpAMD64VPSRAVWMasked256,
+		ssa.OpAMD64VPSRAVWMasked512,
+		ssa.OpAMD64VPSRAVDMasked128,
+		ssa.OpAMD64VPSRAVDMasked256,
+		ssa.OpAMD64VPSRAVDMasked512,
+		ssa.OpAMD64VPSRAVQMasked128,
+		ssa.OpAMD64VPSRAVQMasked256,
+		ssa.OpAMD64VPSRAVQMasked512,
 		ssa.OpAMD64VSUBPSMasked128,
 		ssa.OpAMD64VSUBPSMasked256,
 		ssa.OpAMD64VSUBPSMasked512,
@@ -570,7 +648,19 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VREDUCEPS512,
 		ssa.OpAMD64VREDUCEPD128,
 		ssa.OpAMD64VREDUCEPD256,
-		ssa.OpAMD64VREDUCEPD512:
+		ssa.OpAMD64VREDUCEPD512,
+		ssa.OpAMD64VPROLD128,
+		ssa.OpAMD64VPROLD256,
+		ssa.OpAMD64VPROLD512,
+		ssa.OpAMD64VPROLQ128,
+		ssa.OpAMD64VPROLQ256,
+		ssa.OpAMD64VPROLQ512,
+		ssa.OpAMD64VPRORD128,
+		ssa.OpAMD64VPRORD256,
+		ssa.OpAMD64VPRORD512,
+		ssa.OpAMD64VPRORQ128,
+		ssa.OpAMD64VPRORQ256,
+		ssa.OpAMD64VPRORQ512:
 		p = simdFp11Imm8(s, v)
 
 	case ssa.OpAMD64VRNDSCALEPSMasked128,
@@ -584,14 +674,44 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VREDUCEPSMasked512,
 		ssa.OpAMD64VREDUCEPDMasked128,
 		ssa.OpAMD64VREDUCEPDMasked256,
-		ssa.OpAMD64VREDUCEPDMasked512:
+		ssa.OpAMD64VREDUCEPDMasked512,
+		ssa.OpAMD64VPROLDMasked128,
+		ssa.OpAMD64VPROLDMasked256,
+		ssa.OpAMD64VPROLDMasked512,
+		ssa.OpAMD64VPROLQMasked128,
+		ssa.OpAMD64VPROLQMasked256,
+		ssa.OpAMD64VPROLQMasked512,
+		ssa.OpAMD64VPRORDMasked128,
+		ssa.OpAMD64VPRORDMasked256,
+		ssa.OpAMD64VPRORDMasked512,
+		ssa.OpAMD64VPRORQMasked128,
+		ssa.OpAMD64VPRORQMasked256,
+		ssa.OpAMD64VPRORQMasked512:
 		p = simdFpkfpImm8(s, v)
 
 	case ssa.OpAMD64VDPPD128,
 		ssa.OpAMD64VCMPPS128,
 		ssa.OpAMD64VCMPPS256,
 		ssa.OpAMD64VCMPPD128,
-		ssa.OpAMD64VCMPPD256:
+		ssa.OpAMD64VCMPPD256,
+		ssa.OpAMD64VPSHLDW128,
+		ssa.OpAMD64VPSHLDW256,
+		ssa.OpAMD64VPSHLDW512,
+		ssa.OpAMD64VPSHLDD128,
+		ssa.OpAMD64VPSHLDD256,
+		ssa.OpAMD64VPSHLDD512,
+		ssa.OpAMD64VPSHLDQ128,
+		ssa.OpAMD64VPSHLDQ256,
+		ssa.OpAMD64VPSHLDQ512,
+		ssa.OpAMD64VPSHRDW128,
+		ssa.OpAMD64VPSHRDW256,
+		ssa.OpAMD64VPSHRDW512,
+		ssa.OpAMD64VPSHRDD128,
+		ssa.OpAMD64VPSHRDD256,
+		ssa.OpAMD64VPSHRDD512,
+		ssa.OpAMD64VPSHRDQ128,
+		ssa.OpAMD64VPSHRDQ256,
+		ssa.OpAMD64VPSHRDQ512:
 		p = simdFp21Imm8(s, v)
 
 	case ssa.OpAMD64VCMPPS512,
@@ -681,6 +801,24 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VPDPBUSDS128,
 		ssa.OpAMD64VPDPBUSDS256,
 		ssa.OpAMD64VPDPBUSDS512,
+		ssa.OpAMD64VPSHLDVW128,
+		ssa.OpAMD64VPSHLDVW256,
+		ssa.OpAMD64VPSHLDVW512,
+		ssa.OpAMD64VPSHLDVD128,
+		ssa.OpAMD64VPSHLDVD256,
+		ssa.OpAMD64VPSHLDVD512,
+		ssa.OpAMD64VPSHLDVQ128,
+		ssa.OpAMD64VPSHLDVQ256,
+		ssa.OpAMD64VPSHLDVQ512,
+		ssa.OpAMD64VPSHRDVW128,
+		ssa.OpAMD64VPSHRDVW256,
+		ssa.OpAMD64VPSHRDVW512,
+		ssa.OpAMD64VPSHRDVD128,
+		ssa.OpAMD64VPSHRDVD256,
+		ssa.OpAMD64VPSHRDVD512,
+		ssa.OpAMD64VPSHRDVQ128,
+		ssa.OpAMD64VPSHRDVQ256,
+		ssa.OpAMD64VPSHRDVQ512,
 		ssa.OpAMD64VPDPBUSD128,
 		ssa.OpAMD64VPDPBUSD256,
 		ssa.OpAMD64VPDPBUSD512:
@@ -713,11 +851,63 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VPDPBUSDSMasked128,
 		ssa.OpAMD64VPDPBUSDSMasked256,
 		ssa.OpAMD64VPDPBUSDSMasked512,
+		ssa.OpAMD64VPSHLDVWMasked128,
+		ssa.OpAMD64VPSHLDVWMasked256,
+		ssa.OpAMD64VPSHLDVWMasked512,
+		ssa.OpAMD64VPSHLDVDMasked128,
+		ssa.OpAMD64VPSHLDVDMasked256,
+		ssa.OpAMD64VPSHLDVDMasked512,
+		ssa.OpAMD64VPSHLDVQMasked128,
+		ssa.OpAMD64VPSHLDVQMasked256,
+		ssa.OpAMD64VPSHLDVQMasked512,
+		ssa.OpAMD64VPSHRDVWMasked128,
+		ssa.OpAMD64VPSHRDVWMasked256,
+		ssa.OpAMD64VPSHRDVWMasked512,
+		ssa.OpAMD64VPSHRDVDMasked128,
+		ssa.OpAMD64VPSHRDVDMasked256,
+		ssa.OpAMD64VPSHRDVDMasked512,
+		ssa.OpAMD64VPSHRDVQMasked128,
+		ssa.OpAMD64VPSHRDVQMasked256,
+		ssa.OpAMD64VPSHRDVQMasked512,
 		ssa.OpAMD64VPDPBUSDMasked128,
 		ssa.OpAMD64VPDPBUSDMasked256,
 		ssa.OpAMD64VPDPBUSDMasked512:
 		p = simdFp3kfpResultInArg0(s, v)
 
+	case ssa.OpAMD64VPSLLW128,
+		ssa.OpAMD64VPSLLW256,
+		ssa.OpAMD64VPSLLD128,
+		ssa.OpAMD64VPSLLD256,
+		ssa.OpAMD64VPSLLQ128,
+		ssa.OpAMD64VPSLLQ256,
+		ssa.OpAMD64VPSLLQ512,
+		ssa.OpAMD64VPSRLW128,
+		ssa.OpAMD64VPSRLW256,
+		ssa.OpAMD64VPSRLD128,
+		ssa.OpAMD64VPSRLD256,
+		ssa.OpAMD64VPSRLQ128,
+		ssa.OpAMD64VPSRLQ256,
+		ssa.OpAMD64VPSRLQ512,
+		ssa.OpAMD64VPSRAW128,
+		ssa.OpAMD64VPSRAW256,
+		ssa.OpAMD64VPSRAD128,
+		ssa.OpAMD64VPSRAD256,
+		ssa.OpAMD64VPSRAQ128,
+		ssa.OpAMD64VPSRAQ256,
+		ssa.OpAMD64VPSRAQ512:
+		p = simdFpXfp(s, v)
+
+	case ssa.OpAMD64VPSLLQMasked128,
+		ssa.OpAMD64VPSLLQMasked256,
+		ssa.OpAMD64VPSLLQMasked512,
+		ssa.OpAMD64VPSRLQMasked128,
+		ssa.OpAMD64VPSRLQMasked256,
+		ssa.OpAMD64VPSRLQMasked512,
+		ssa.OpAMD64VPSRAQMasked128,
+		ssa.OpAMD64VPSRAQMasked256,
+		ssa.OpAMD64VPSRAQMasked512:
+		p = simdFpXkfp(s, v)
+
 	case ssa.OpAMD64VPINSRB128,
 		ssa.OpAMD64VPINSRW128,
 		ssa.OpAMD64VPINSRD128,
@@ -730,6 +920,26 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VPEXTRQ128:
 		p = simdFpgpImm8(s, v)
 
+	case ssa.OpAMD64VPSHLDWMasked128,
+		ssa.OpAMD64VPSHLDWMasked256,
+		ssa.OpAMD64VPSHLDWMasked512,
+		ssa.OpAMD64VPSHLDDMasked128,
+		ssa.OpAMD64VPSHLDDMasked256,
+		ssa.OpAMD64VPSHLDDMasked512,
+		ssa.OpAMD64VPSHLDQMasked128,
+		ssa.OpAMD64VPSHLDQMasked256,
+		ssa.OpAMD64VPSHLDQMasked512,
+		ssa.OpAMD64VPSHRDWMasked128,
+		ssa.OpAMD64VPSHRDWMasked256,
+		ssa.OpAMD64VPSHRDWMasked512,
+		ssa.OpAMD64VPSHRDDMasked128,
+		ssa.OpAMD64VPSHRDDMasked256,
+		ssa.OpAMD64VPSHRDDMasked512,
+		ssa.OpAMD64VPSHRDQMasked128,
+		ssa.OpAMD64VPSHRDQMasked256,
+		ssa.OpAMD64VPSHRDQMasked512:
+		p = simdFp2kfpImm8(s, v)
+
 	default:
 		// Unknown reg shape
 		return false
@@ -968,6 +1178,30 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VPOPCNTQMasked128,
 		ssa.OpAMD64VPOPCNTQMasked256,
 		ssa.OpAMD64VPOPCNTQMasked512,
+		ssa.OpAMD64VPROLDMasked128,
+		ssa.OpAMD64VPROLDMasked256,
+		ssa.OpAMD64VPROLDMasked512,
+		ssa.OpAMD64VPROLQMasked128,
+		ssa.OpAMD64VPROLQMasked256,
+		ssa.OpAMD64VPROLQMasked512,
+		ssa.OpAMD64VPRORDMasked128,
+		ssa.OpAMD64VPRORDMasked256,
+		ssa.OpAMD64VPRORDMasked512,
+		ssa.OpAMD64VPRORQMasked128,
+		ssa.OpAMD64VPRORQMasked256,
+		ssa.OpAMD64VPRORQMasked512,
+		ssa.OpAMD64VPROLVDMasked128,
+		ssa.OpAMD64VPROLVDMasked256,
+		ssa.OpAMD64VPROLVDMasked512,
+		ssa.OpAMD64VPROLVQMasked128,
+		ssa.OpAMD64VPROLVQMasked256,
+		ssa.OpAMD64VPROLVQMasked512,
+		ssa.OpAMD64VPRORVDMasked128,
+		ssa.OpAMD64VPRORVDMasked256,
+		ssa.OpAMD64VPRORVDMasked512,
+		ssa.OpAMD64VPRORVQMasked128,
+		ssa.OpAMD64VPRORVQMasked256,
+		ssa.OpAMD64VPRORVQMasked512,
 		ssa.OpAMD64VPADDSBMasked128,
 		ssa.OpAMD64VPADDSBMasked256,
 		ssa.OpAMD64VPADDSBMasked512,
@@ -989,6 +1223,78 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VPDPBUSDSMasked128,
 		ssa.OpAMD64VPDPBUSDSMasked256,
 		ssa.OpAMD64VPDPBUSDSMasked512,
+		ssa.OpAMD64VPSLLQMasked128,
+		ssa.OpAMD64VPSLLQMasked256,
+		ssa.OpAMD64VPSLLQMasked512,
+		ssa.OpAMD64VPSHLDWMasked128,
+		ssa.OpAMD64VPSHLDWMasked256,
+		ssa.OpAMD64VPSHLDWMasked512,
+		ssa.OpAMD64VPSHLDDMasked128,
+		ssa.OpAMD64VPSHLDDMasked256,
+		ssa.OpAMD64VPSHLDDMasked512,
+		ssa.OpAMD64VPSHLDQMasked128,
+		ssa.OpAMD64VPSHLDQMasked256,
+		ssa.OpAMD64VPSHLDQMasked512,
+		ssa.OpAMD64VPSRLQMasked128,
+		ssa.OpAMD64VPSRLQMasked256,
+		ssa.OpAMD64VPSRLQMasked512,
+		ssa.OpAMD64VPSHRDWMasked128,
+		ssa.OpAMD64VPSHRDWMasked256,
+		ssa.OpAMD64VPSHRDWMasked512,
+		ssa.OpAMD64VPSHRDDMasked128,
+		ssa.OpAMD64VPSHRDDMasked256,
+		ssa.OpAMD64VPSHRDDMasked512,
+		ssa.OpAMD64VPSHRDQMasked128,
+		ssa.OpAMD64VPSHRDQMasked256,
+		ssa.OpAMD64VPSHRDQMasked512,
+		ssa.OpAMD64VPSRAQMasked128,
+		ssa.OpAMD64VPSRAQMasked256,
+		ssa.OpAMD64VPSRAQMasked512,
+		ssa.OpAMD64VPSLLVWMasked128,
+		ssa.OpAMD64VPSLLVWMasked256,
+		ssa.OpAMD64VPSLLVWMasked512,
+		ssa.OpAMD64VPSLLVDMasked128,
+		ssa.OpAMD64VPSLLVDMasked256,
+		ssa.OpAMD64VPSLLVDMasked512,
+		ssa.OpAMD64VPSLLVQMasked128,
+		ssa.OpAMD64VPSLLVQMasked256,
+		ssa.OpAMD64VPSLLVQMasked512,
+		ssa.OpAMD64VPSHLDVWMasked128,
+		ssa.OpAMD64VPSHLDVWMasked256,
+		ssa.OpAMD64VPSHLDVWMasked512,
+		ssa.OpAMD64VPSHLDVDMasked128,
+		ssa.OpAMD64VPSHLDVDMasked256,
+		ssa.OpAMD64VPSHLDVDMasked512,
+		ssa.OpAMD64VPSHLDVQMasked128,
+		ssa.OpAMD64VPSHLDVQMasked256,
+		ssa.OpAMD64VPSHLDVQMasked512,
+		ssa.OpAMD64VPSRLVWMasked128,
+		ssa.OpAMD64VPSRLVWMasked256,
+		ssa.OpAMD64VPSRLVWMasked512,
+		ssa.OpAMD64VPSRLVDMasked128,
+		ssa.OpAMD64VPSRLVDMasked256,
+		ssa.OpAMD64VPSRLVDMasked512,
+		ssa.OpAMD64VPSRLVQMasked128,
+		ssa.OpAMD64VPSRLVQMasked256,
+		ssa.OpAMD64VPSRLVQMasked512,
+		ssa.OpAMD64VPSHRDVWMasked128,
+		ssa.OpAMD64VPSHRDVWMasked256,
+		ssa.OpAMD64VPSHRDVWMasked512,
+		ssa.OpAMD64VPSHRDVDMasked128,
+		ssa.OpAMD64VPSHRDVDMasked256,
+		ssa.OpAMD64VPSHRDVDMasked512,
+		ssa.OpAMD64VPSHRDVQMasked128,
+		ssa.OpAMD64VPSHRDVQMasked256,
+		ssa.OpAMD64VPSHRDVQMasked512,
+		ssa.OpAMD64VPSRAVWMasked128,
+		ssa.OpAMD64VPSRAVWMasked256,
+		ssa.OpAMD64VPSRAVWMasked512,
+		ssa.OpAMD64VPSRAVDMasked128,
+		ssa.OpAMD64VPSRAVDMasked256,
+		ssa.OpAMD64VPSRAVDMasked512,
+		ssa.OpAMD64VPSRAVQMasked128,
+		ssa.OpAMD64VPSRAVQMasked256,
+		ssa.OpAMD64VPSRAVQMasked512,
 		ssa.OpAMD64VSQRTPSMasked128,
 		ssa.OpAMD64VSQRTPSMasked256,
 		ssa.OpAMD64VSQRTPSMasked512,
diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
index b21d58b4a44..968ded21313 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
@@ -904,6 +904,54 @@
 (MaskedPopCountUint64x2 x mask) => (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
 (MaskedPopCountUint64x4 x mask) => (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
 (MaskedPopCountUint64x8 x mask) => (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedRotateAllLeftInt32x4 [a] x mask) => (VPROLDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedRotateAllLeftInt32x8 [a] x mask) => (VPROLDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedRotateAllLeftInt32x16 [a] x mask) => (VPROLDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedRotateAllLeftInt64x2 [a] x mask) => (VPROLQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedRotateAllLeftInt64x4 [a] x mask) => (VPROLQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedRotateAllLeftInt64x8 [a] x mask) => (VPROLQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedRotateAllLeftUint32x4 [a] x mask) => (VPROLDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedRotateAllLeftUint32x8 [a] x mask) => (VPROLDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedRotateAllLeftUint32x16 [a] x mask) => (VPROLDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedRotateAllLeftUint64x2 [a] x mask) => (VPROLQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedRotateAllLeftUint64x4 [a] x mask) => (VPROLQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedRotateAllLeftUint64x8 [a] x mask) => (VPROLQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedRotateAllRightInt32x4 [a] x mask) => (VPRORDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedRotateAllRightInt32x8 [a] x mask) => (VPRORDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedRotateAllRightInt32x16 [a] x mask) => (VPRORDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedRotateAllRightInt64x2 [a] x mask) => (VPRORQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedRotateAllRightInt64x4 [a] x mask) => (VPRORQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedRotateAllRightInt64x8 [a] x mask) => (VPRORQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedRotateAllRightUint32x4 [a] x mask) => (VPRORDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedRotateAllRightUint32x8 [a] x mask) => (VPRORDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedRotateAllRightUint32x16 [a] x mask) => (VPRORDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedRotateAllRightUint64x2 [a] x mask) => (VPRORQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedRotateAllRightUint64x4 [a] x mask) => (VPRORQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedRotateAllRightUint64x8 [a] x mask) => (VPRORQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedRotateLeftInt32x4 x y mask) => (VPROLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedRotateLeftInt32x8 x y mask) => (VPROLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedRotateLeftInt32x16 x y mask) => (VPROLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedRotateLeftInt64x2 x y mask) => (VPROLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedRotateLeftInt64x4 x y mask) => (VPROLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedRotateLeftInt64x8 x y mask) => (VPROLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedRotateLeftUint32x4 x y mask) => (VPROLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedRotateLeftUint32x8 x y mask) => (VPROLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedRotateLeftUint32x16 x y mask) => (VPROLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedRotateLeftUint64x2 x y mask) => (VPROLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedRotateLeftUint64x4 x y mask) => (VPROLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedRotateLeftUint64x8 x y mask) => (VPROLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedRotateRightInt32x4 x y mask) => (VPRORVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedRotateRightInt32x8 x y mask) => (VPRORVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedRotateRightInt32x16 x y mask) => (VPRORVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedRotateRightInt64x2 x y mask) => (VPRORVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedRotateRightInt64x4 x y mask) => (VPRORVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedRotateRightInt64x8 x y mask) => (VPRORVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedRotateRightUint32x4 x y mask) => (VPRORVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedRotateRightUint32x8 x y mask) => (VPRORVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedRotateRightUint32x16 x y mask) => (VPRORVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedRotateRightUint64x2 x y mask) => (VPRORVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedRotateRightUint64x4 x y mask) => (VPRORVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedRotateRightUint64x8 x y mask) => (VPRORVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (MaskedRoundSuppressExceptionWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+8] x (VPMOVVec32x4ToM <types.TypeMask> mask))
 (MaskedRoundSuppressExceptionWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+8] x (VPMOVVec32x8ToM <types.TypeMask> mask))
 (MaskedRoundSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+8] x (VPMOVVec32x16ToM <types.TypeMask> mask))
@@ -952,6 +1000,147 @@
 (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
 (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
 (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedShiftAllLeftInt64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedShiftAllLeftInt64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedShiftAllLeftInt64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedShiftAllLeftUint64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedShiftAllLeftUint64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedShiftAllLeftUint64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedShiftAllLeftAndFillUpperFromInt16x8 [a] x y mask) => (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(MaskedShiftAllLeftAndFillUpperFromInt16x16 [a] x y mask) => (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(MaskedShiftAllLeftAndFillUpperFromInt16x32 [a] x y mask) => (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(MaskedShiftAllLeftAndFillUpperFromInt32x4 [a] x y mask) => (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedShiftAllLeftAndFillUpperFromInt32x8 [a] x y mask) => (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedShiftAllLeftAndFillUpperFromInt32x16 [a] x y mask) => (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedShiftAllLeftAndFillUpperFromInt64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedShiftAllLeftAndFillUpperFromInt64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedShiftAllLeftAndFillUpperFromInt64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedShiftAllLeftAndFillUpperFromUint16x8 [a] x y mask) => (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(MaskedShiftAllLeftAndFillUpperFromUint16x16 [a] x y mask) => (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(MaskedShiftAllLeftAndFillUpperFromUint16x32 [a] x y mask) => (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(MaskedShiftAllLeftAndFillUpperFromUint32x4 [a] x y mask) => (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedShiftAllLeftAndFillUpperFromUint32x8 [a] x y mask) => (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedShiftAllLeftAndFillUpperFromUint32x16 [a] x y mask) => (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedShiftAllLeftAndFillUpperFromUint64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedShiftAllLeftAndFillUpperFromUint64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedShiftAllLeftAndFillUpperFromUint64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedShiftAllRightInt64x2 x y mask) => (VPSRLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedShiftAllRightInt64x4 x y mask) => (VPSRLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedShiftAllRightInt64x8 x y mask) => (VPSRLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedShiftAllRightUint64x2 x y mask) => (VPSRLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedShiftAllRightUint64x4 x y mask) => (VPSRLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedShiftAllRightUint64x8 x y mask) => (VPSRLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedShiftAllRightAndFillUpperFromInt16x8 [a] x y mask) => (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(MaskedShiftAllRightAndFillUpperFromInt16x16 [a] x y mask) => (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(MaskedShiftAllRightAndFillUpperFromInt16x32 [a] x y mask) => (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(MaskedShiftAllRightAndFillUpperFromInt32x4 [a] x y mask) => (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedShiftAllRightAndFillUpperFromInt32x8 [a] x y mask) => (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedShiftAllRightAndFillUpperFromInt32x16 [a] x y mask) => (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedShiftAllRightAndFillUpperFromInt64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedShiftAllRightAndFillUpperFromInt64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedShiftAllRightAndFillUpperFromInt64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedShiftAllRightAndFillUpperFromUint16x8 [a] x y mask) => (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(MaskedShiftAllRightAndFillUpperFromUint16x16 [a] x y mask) => (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(MaskedShiftAllRightAndFillUpperFromUint16x32 [a] x y mask) => (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(MaskedShiftAllRightAndFillUpperFromUint32x4 [a] x y mask) => (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedShiftAllRightAndFillUpperFromUint32x8 [a] x y mask) => (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedShiftAllRightAndFillUpperFromUint32x16 [a] x y mask) => (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedShiftAllRightAndFillUpperFromUint64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedShiftAllRightAndFillUpperFromUint64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedShiftAllRightAndFillUpperFromUint64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedShiftAllRightSignExtendedInt64x2 x y mask) => (VPSRAQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedShiftAllRightSignExtendedInt64x4 x y mask) => (VPSRAQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedShiftAllRightSignExtendedInt64x8 x y mask) => (VPSRAQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedShiftLeftInt16x8 x y mask) => (VPSLLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(MaskedShiftLeftInt16x16 x y mask) => (VPSLLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(MaskedShiftLeftInt16x32 x y mask) => (VPSLLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(MaskedShiftLeftInt32x4 x y mask) => (VPSLLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedShiftLeftInt32x8 x y mask) => (VPSLLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedShiftLeftInt32x16 x y mask) => (VPSLLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedShiftLeftInt64x2 x y mask) => (VPSLLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedShiftLeftInt64x4 x y mask) => (VPSLLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedShiftLeftInt64x8 x y mask) => (VPSLLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedShiftLeftUint16x8 x y mask) => (VPSLLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(MaskedShiftLeftUint16x16 x y mask) => (VPSLLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(MaskedShiftLeftUint16x32 x y mask) => (VPSLLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(MaskedShiftLeftUint32x4 x y mask) => (VPSLLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedShiftLeftUint32x8 x y mask) => (VPSLLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedShiftLeftUint32x16 x y mask) => (VPSLLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedShiftLeftUint64x2 x y mask) => (VPSLLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedShiftLeftUint64x4 x y mask) => (VPSLLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedShiftLeftUint64x8 x y mask) => (VPSLLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedShiftLeftAndFillUpperFromInt16x8 x y z mask) => (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+(MaskedShiftLeftAndFillUpperFromInt16x16 x y z mask) => (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+(MaskedShiftLeftAndFillUpperFromInt16x32 x y z mask) => (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+(MaskedShiftLeftAndFillUpperFromInt32x4 x y z mask) => (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedShiftLeftAndFillUpperFromInt32x8 x y z mask) => (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedShiftLeftAndFillUpperFromInt32x16 x y z mask) => (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedShiftLeftAndFillUpperFromInt64x2 x y z mask) => (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedShiftLeftAndFillUpperFromInt64x4 x y z mask) => (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedShiftLeftAndFillUpperFromInt64x8 x y z mask) => (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedShiftLeftAndFillUpperFromUint16x8 x y z mask) => (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+(MaskedShiftLeftAndFillUpperFromUint16x16 x y z mask) => (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+(MaskedShiftLeftAndFillUpperFromUint16x32 x y z mask) => (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+(MaskedShiftLeftAndFillUpperFromUint32x4 x y z mask) => (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedShiftLeftAndFillUpperFromUint32x8 x y z mask) => (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedShiftLeftAndFillUpperFromUint32x16 x y z mask) => (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedShiftLeftAndFillUpperFromUint64x2 x y z mask) => (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedShiftLeftAndFillUpperFromUint64x4 x y z mask) => (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedShiftLeftAndFillUpperFromUint64x8 x y z mask) => (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedShiftRightInt16x8 x y mask) => (VPSRLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(MaskedShiftRightInt16x16 x y mask) => (VPSRLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(MaskedShiftRightInt16x32 x y mask) => (VPSRLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(MaskedShiftRightInt32x4 x y mask) => (VPSRLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedShiftRightInt32x8 x y mask) => (VPSRLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedShiftRightInt32x16 x y mask) => (VPSRLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedShiftRightInt64x2 x y mask) => (VPSRLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedShiftRightInt64x4 x y mask) => (VPSRLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedShiftRightInt64x8 x y mask) => (VPSRLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedShiftRightUint16x8 x y mask) => (VPSRLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(MaskedShiftRightUint16x16 x y mask) => (VPSRLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(MaskedShiftRightUint16x32 x y mask) => (VPSRLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(MaskedShiftRightUint32x4 x y mask) => (VPSRLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedShiftRightUint32x8 x y mask) => (VPSRLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedShiftRightUint32x16 x y mask) => (VPSRLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedShiftRightUint64x2 x y mask) => (VPSRLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedShiftRightUint64x4 x y mask) => (VPSRLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedShiftRightUint64x8 x y mask) => (VPSRLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedShiftRightAndFillUpperFromInt16x8 x y z mask) => (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+(MaskedShiftRightAndFillUpperFromInt16x16 x y z mask) => (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+(MaskedShiftRightAndFillUpperFromInt16x32 x y z mask) => (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+(MaskedShiftRightAndFillUpperFromInt32x4 x y z mask) => (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedShiftRightAndFillUpperFromInt32x8 x y z mask) => (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedShiftRightAndFillUpperFromInt32x16 x y z mask) => (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedShiftRightAndFillUpperFromInt64x2 x y z mask) => (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedShiftRightAndFillUpperFromInt64x4 x y z mask) => (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedShiftRightAndFillUpperFromInt64x8 x y z mask) => (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedShiftRightAndFillUpperFromUint16x8 x y z mask) => (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+(MaskedShiftRightAndFillUpperFromUint16x16 x y z mask) => (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+(MaskedShiftRightAndFillUpperFromUint16x32 x y z mask) => (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+(MaskedShiftRightAndFillUpperFromUint32x4 x y z mask) => (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedShiftRightAndFillUpperFromUint32x8 x y z mask) => (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedShiftRightAndFillUpperFromUint32x16 x y z mask) => (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedShiftRightAndFillUpperFromUint64x2 x y z mask) => (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedShiftRightAndFillUpperFromUint64x4 x y z mask) => (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedShiftRightAndFillUpperFromUint64x8 x y z mask) => (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedShiftRightSignExtendedInt16x8 x y mask) => (VPSRAVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(MaskedShiftRightSignExtendedInt16x16 x y mask) => (VPSRAVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(MaskedShiftRightSignExtendedInt16x32 x y mask) => (VPSRAVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(MaskedShiftRightSignExtendedInt32x4 x y mask) => (VPSRAVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedShiftRightSignExtendedInt32x8 x y mask) => (VPSRAVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedShiftRightSignExtendedInt32x16 x y mask) => (VPSRAVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedShiftRightSignExtendedInt64x2 x y mask) => (VPSRAVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedShiftRightSignExtendedInt64x4 x y mask) => (VPSRAVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedShiftRightSignExtendedInt64x8 x y mask) => (VPSRAVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedShiftRightSignExtendedUint16x8 x y mask) => (VPSRAVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(MaskedShiftRightSignExtendedUint16x16 x y mask) => (VPSRAVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(MaskedShiftRightSignExtendedUint16x32 x y mask) => (VPSRAVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(MaskedShiftRightSignExtendedUint32x4 x y mask) => (VPSRAVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedShiftRightSignExtendedUint32x8 x y mask) => (VPSRAVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedShiftRightSignExtendedUint32x16 x y mask) => (VPSRAVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedShiftRightSignExtendedUint64x2 x y mask) => (VPSRAVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedShiftRightSignExtendedUint64x4 x y mask) => (VPSRAVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedShiftRightSignExtendedUint64x8 x y mask) => (VPSRAVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (MaskedSqrtFloat32x4 x mask) => (VSQRTPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
 (MaskedSqrtFloat32x8 x mask) => (VSQRTPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
 (MaskedSqrtFloat32x16 x mask) => (VSQRTPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
@@ -1231,6 +1420,54 @@
 (PopCountUint64x2 ...) => (VPOPCNTQ128 ...)
 (PopCountUint64x4 ...) => (VPOPCNTQ256 ...)
 (PopCountUint64x8 ...) => (VPOPCNTQ512 ...)
+(RotateAllLeftInt32x4 [a] x) => (VPROLD128 [a] x)
+(RotateAllLeftInt32x8 [a] x) => (VPROLD256 [a] x)
+(RotateAllLeftInt32x16 [a] x) => (VPROLD512 [a] x)
+(RotateAllLeftInt64x2 [a] x) => (VPROLQ128 [a] x)
+(RotateAllLeftInt64x4 [a] x) => (VPROLQ256 [a] x)
+(RotateAllLeftInt64x8 [a] x) => (VPROLQ512 [a] x)
+(RotateAllLeftUint32x4 [a] x) => (VPROLD128 [a] x)
+(RotateAllLeftUint32x8 [a] x) => (VPROLD256 [a] x)
+(RotateAllLeftUint32x16 [a] x) => (VPROLD512 [a] x)
+(RotateAllLeftUint64x2 [a] x) => (VPROLQ128 [a] x)
+(RotateAllLeftUint64x4 [a] x) => (VPROLQ256 [a] x)
+(RotateAllLeftUint64x8 [a] x) => (VPROLQ512 [a] x)
+(RotateAllRightInt32x4 [a] x) => (VPRORD128 [a] x)
+(RotateAllRightInt32x8 [a] x) => (VPRORD256 [a] x)
+(RotateAllRightInt32x16 [a] x) => (VPRORD512 [a] x)
+(RotateAllRightInt64x2 [a] x) => (VPRORQ128 [a] x)
+(RotateAllRightInt64x4 [a] x) => (VPRORQ256 [a] x)
+(RotateAllRightInt64x8 [a] x) => (VPRORQ512 [a] x)
+(RotateAllRightUint32x4 [a] x) => (VPRORD128 [a] x)
+(RotateAllRightUint32x8 [a] x) => (VPRORD256 [a] x)
+(RotateAllRightUint32x16 [a] x) => (VPRORD512 [a] x)
+(RotateAllRightUint64x2 [a] x) => (VPRORQ128 [a] x)
+(RotateAllRightUint64x4 [a] x) => (VPRORQ256 [a] x)
+(RotateAllRightUint64x8 [a] x) => (VPRORQ512 [a] x)
+(RotateLeftInt32x4 ...) => (VPROLVD128 ...)
+(RotateLeftInt32x8 ...) => (VPROLVD256 ...)
+(RotateLeftInt32x16 ...) => (VPROLVD512 ...)
+(RotateLeftInt64x2 ...) => (VPROLVQ128 ...)
+(RotateLeftInt64x4 ...) => (VPROLVQ256 ...)
+(RotateLeftInt64x8 ...) => (VPROLVQ512 ...)
+(RotateLeftUint32x4 ...) => (VPROLVD128 ...)
+(RotateLeftUint32x8 ...) => (VPROLVD256 ...)
+(RotateLeftUint32x16 ...) => (VPROLVD512 ...)
+(RotateLeftUint64x2 ...) => (VPROLVQ128 ...)
+(RotateLeftUint64x4 ...) => (VPROLVQ256 ...)
+(RotateLeftUint64x8 ...) => (VPROLVQ512 ...)
+(RotateRightInt32x4 ...) => (VPRORVD128 ...)
+(RotateRightInt32x8 ...) => (VPRORVD256 ...)
+(RotateRightInt32x16 ...) => (VPRORVD512 ...)
+(RotateRightInt64x2 ...) => (VPRORVQ128 ...)
+(RotateRightInt64x4 ...) => (VPRORVQ256 ...)
+(RotateRightInt64x8 ...) => (VPRORVQ512 ...)
+(RotateRightUint32x4 ...) => (VPRORVD128 ...)
+(RotateRightUint32x8 ...) => (VPRORVD256 ...)
+(RotateRightUint32x16 ...) => (VPRORVD512 ...)
+(RotateRightUint64x2 ...) => (VPRORVQ128 ...)
+(RotateRightUint64x4 ...) => (VPRORVQ256 ...)
+(RotateRightUint64x8 ...) => (VPRORVQ512 ...)
 (RoundFloat32x4 x) => (VROUNDPS128 [0] x)
 (RoundFloat32x8 x) => (VROUNDPS256 [0] x)
 (RoundFloat64x2 x) => (VROUNDPD128 [0] x)
@@ -1295,6 +1532,167 @@
 (SetElemUint16x8 [a] x y) => (VPINSRW128 [a] x y)
 (SetElemUint32x4 [a] x y) => (VPINSRD128 [a] x y)
 (SetElemUint64x2 [a] x y) => (VPINSRQ128 [a] x y)
+(ShiftAllLeftInt16x8 ...) => (VPSLLW128 ...)
+(ShiftAllLeftInt16x16 ...) => (VPSLLW256 ...)
+(ShiftAllLeftInt32x4 ...) => (VPSLLD128 ...)
+(ShiftAllLeftInt32x8 ...) => (VPSLLD256 ...)
+(ShiftAllLeftInt64x2 ...) => (VPSLLQ128 ...)
+(ShiftAllLeftInt64x4 ...) => (VPSLLQ256 ...)
+(ShiftAllLeftInt64x8 ...) => (VPSLLQ512 ...)
+(ShiftAllLeftUint16x8 ...) => (VPSLLW128 ...)
+(ShiftAllLeftUint16x16 ...) => (VPSLLW256 ...)
+(ShiftAllLeftUint32x4 ...) => (VPSLLD128 ...)
+(ShiftAllLeftUint32x8 ...) => (VPSLLD256 ...)
+(ShiftAllLeftUint64x2 ...) => (VPSLLQ128 ...)
+(ShiftAllLeftUint64x4 ...) => (VPSLLQ256 ...)
+(ShiftAllLeftUint64x8 ...) => (VPSLLQ512 ...)
+(ShiftAllLeftAndFillUpperFromInt16x8 [a] x y) => (VPSHLDW128 [a] x y)
+(ShiftAllLeftAndFillUpperFromInt16x16 [a] x y) => (VPSHLDW256 [a] x y)
+(ShiftAllLeftAndFillUpperFromInt16x32 [a] x y) => (VPSHLDW512 [a] x y)
+(ShiftAllLeftAndFillUpperFromInt32x4 [a] x y) => (VPSHLDD128 [a] x y)
+(ShiftAllLeftAndFillUpperFromInt32x8 [a] x y) => (VPSHLDD256 [a] x y)
+(ShiftAllLeftAndFillUpperFromInt32x16 [a] x y) => (VPSHLDD512 [a] x y)
+(ShiftAllLeftAndFillUpperFromInt64x2 [a] x y) => (VPSHLDQ128 [a] x y)
+(ShiftAllLeftAndFillUpperFromInt64x4 [a] x y) => (VPSHLDQ256 [a] x y)
+(ShiftAllLeftAndFillUpperFromInt64x8 [a] x y) => (VPSHLDQ512 [a] x y)
+(ShiftAllLeftAndFillUpperFromUint16x8 [a] x y) => (VPSHLDW128 [a] x y)
+(ShiftAllLeftAndFillUpperFromUint16x16 [a] x y) => (VPSHLDW256 [a] x y)
+(ShiftAllLeftAndFillUpperFromUint16x32 [a] x y) => (VPSHLDW512 [a] x y)
+(ShiftAllLeftAndFillUpperFromUint32x4 [a] x y) => (VPSHLDD128 [a] x y)
+(ShiftAllLeftAndFillUpperFromUint32x8 [a] x y) => (VPSHLDD256 [a] x y)
+(ShiftAllLeftAndFillUpperFromUint32x16 [a] x y) => (VPSHLDD512 [a] x y)
+(ShiftAllLeftAndFillUpperFromUint64x2 [a] x y) => (VPSHLDQ128 [a] x y)
+(ShiftAllLeftAndFillUpperFromUint64x4 [a] x y) => (VPSHLDQ256 [a] x y)
+(ShiftAllLeftAndFillUpperFromUint64x8 [a] x y) => (VPSHLDQ512 [a] x y)
+(ShiftAllRightInt16x8 ...) => (VPSRLW128 ...)
+(ShiftAllRightInt16x16 ...) => (VPSRLW256 ...)
+(ShiftAllRightInt32x4 ...) => (VPSRLD128 ...)
+(ShiftAllRightInt32x8 ...) => (VPSRLD256 ...)
+(ShiftAllRightInt64x2 ...) => (VPSRLQ128 ...)
+(ShiftAllRightInt64x4 ...) => (VPSRLQ256 ...)
+(ShiftAllRightInt64x8 ...) => (VPSRLQ512 ...)
+(ShiftAllRightUint16x8 ...) => (VPSRLW128 ...)
+(ShiftAllRightUint16x16 ...) => (VPSRLW256 ...)
+(ShiftAllRightUint32x4 ...) => (VPSRLD128 ...)
+(ShiftAllRightUint32x8 ...) => (VPSRLD256 ...)
+(ShiftAllRightUint64x2 ...) => (VPSRLQ128 ...)
+(ShiftAllRightUint64x4 ...) => (VPSRLQ256 ...)
+(ShiftAllRightUint64x8 ...) => (VPSRLQ512 ...)
+(ShiftAllRightAndFillUpperFromInt16x8 [a] x y) => (VPSHRDW128 [a] x y)
+(ShiftAllRightAndFillUpperFromInt16x16 [a] x y) => (VPSHRDW256 [a] x y)
+(ShiftAllRightAndFillUpperFromInt16x32 [a] x y) => (VPSHRDW512 [a] x y)
+(ShiftAllRightAndFillUpperFromInt32x4 [a] x y) => (VPSHRDD128 [a] x y)
+(ShiftAllRightAndFillUpperFromInt32x8 [a] x y) => (VPSHRDD256 [a] x y)
+(ShiftAllRightAndFillUpperFromInt32x16 [a] x y) => (VPSHRDD512 [a] x y)
+(ShiftAllRightAndFillUpperFromInt64x2 [a] x y) => (VPSHRDQ128 [a] x y)
+(ShiftAllRightAndFillUpperFromInt64x4 [a] x y) => (VPSHRDQ256 [a] x y)
+(ShiftAllRightAndFillUpperFromInt64x8 [a] x y) => (VPSHRDQ512 [a] x y)
+(ShiftAllRightAndFillUpperFromUint16x8 [a] x y) => (VPSHRDW128 [a] x y)
+(ShiftAllRightAndFillUpperFromUint16x16 [a] x y) => (VPSHRDW256 [a] x y)
+(ShiftAllRightAndFillUpperFromUint16x32 [a] x y) => (VPSHRDW512 [a] x y)
+(ShiftAllRightAndFillUpperFromUint32x4 [a] x y) => (VPSHRDD128 [a] x y)
+(ShiftAllRightAndFillUpperFromUint32x8 [a] x y) => (VPSHRDD256 [a] x y)
+(ShiftAllRightAndFillUpperFromUint32x16 [a] x y) => (VPSHRDD512 [a] x y)
+(ShiftAllRightAndFillUpperFromUint64x2 [a] x y) => (VPSHRDQ128 [a] x y)
+(ShiftAllRightAndFillUpperFromUint64x4 [a] x y) => (VPSHRDQ256 [a] x y)
+(ShiftAllRightAndFillUpperFromUint64x8 [a] x y) => (VPSHRDQ512 [a] x y)
+(ShiftAllRightSignExtendedInt16x8 ...) => (VPSRAW128 ...)
+(ShiftAllRightSignExtendedInt16x16 ...) => (VPSRAW256 ...)
+(ShiftAllRightSignExtendedInt32x4 ...) => (VPSRAD128 ...)
+(ShiftAllRightSignExtendedInt32x8 ...) => (VPSRAD256 ...)
+(ShiftAllRightSignExtendedInt64x2 ...) => (VPSRAQ128 ...)
+(ShiftAllRightSignExtendedInt64x4 ...) => (VPSRAQ256 ...)
+(ShiftAllRightSignExtendedInt64x8 ...) => (VPSRAQ512 ...)
+(ShiftLeftInt16x8 ...) => (VPSLLVW128 ...)
+(ShiftLeftInt16x16 ...) => (VPSLLVW256 ...)
+(ShiftLeftInt16x32 ...) => (VPSLLVW512 ...)
+(ShiftLeftInt32x4 ...) => (VPSLLVD128 ...)
+(ShiftLeftInt32x8 ...) => (VPSLLVD256 ...)
+(ShiftLeftInt32x16 ...) => (VPSLLVD512 ...)
+(ShiftLeftInt64x2 ...) => (VPSLLVQ128 ...)
+(ShiftLeftInt64x4 ...) => (VPSLLVQ256 ...)
+(ShiftLeftInt64x8 ...) => (VPSLLVQ512 ...)
+(ShiftLeftUint16x8 ...) => (VPSLLVW128 ...)
+(ShiftLeftUint16x16 ...) => (VPSLLVW256 ...)
+(ShiftLeftUint16x32 ...) => (VPSLLVW512 ...)
+(ShiftLeftUint32x4 ...) => (VPSLLVD128 ...)
+(ShiftLeftUint32x8 ...) => (VPSLLVD256 ...)
+(ShiftLeftUint32x16 ...) => (VPSLLVD512 ...)
+(ShiftLeftUint64x2 ...) => (VPSLLVQ128 ...)
+(ShiftLeftUint64x4 ...) => (VPSLLVQ256 ...)
+(ShiftLeftUint64x8 ...) => (VPSLLVQ512 ...)
+(ShiftLeftAndFillUpperFromInt16x8 ...) => (VPSHLDVW128 ...)
+(ShiftLeftAndFillUpperFromInt16x16 ...) => (VPSHLDVW256 ...)
+(ShiftLeftAndFillUpperFromInt16x32 ...) => (VPSHLDVW512 ...)
+(ShiftLeftAndFillUpperFromInt32x4 ...) => (VPSHLDVD128 ...)
+(ShiftLeftAndFillUpperFromInt32x8 ...) => (VPSHLDVD256 ...)
+(ShiftLeftAndFillUpperFromInt32x16 ...) => (VPSHLDVD512 ...)
+(ShiftLeftAndFillUpperFromInt64x2 ...) => (VPSHLDVQ128 ...)
+(ShiftLeftAndFillUpperFromInt64x4 ...) => (VPSHLDVQ256 ...)
+(ShiftLeftAndFillUpperFromInt64x8 ...) => (VPSHLDVQ512 ...)
+(ShiftLeftAndFillUpperFromUint16x8 ...) => (VPSHLDVW128 ...)
+(ShiftLeftAndFillUpperFromUint16x16 ...) => (VPSHLDVW256 ...)
+(ShiftLeftAndFillUpperFromUint16x32 ...) => (VPSHLDVW512 ...)
+(ShiftLeftAndFillUpperFromUint32x4 ...) => (VPSHLDVD128 ...)
+(ShiftLeftAndFillUpperFromUint32x8 ...) => (VPSHLDVD256 ...)
+(ShiftLeftAndFillUpperFromUint32x16 ...) => (VPSHLDVD512 ...)
+(ShiftLeftAndFillUpperFromUint64x2 ...) => (VPSHLDVQ128 ...)
+(ShiftLeftAndFillUpperFromUint64x4 ...) => (VPSHLDVQ256 ...)
+(ShiftLeftAndFillUpperFromUint64x8 ...) => (VPSHLDVQ512 ...)
+(ShiftRightInt16x8 ...) => (VPSRLVW128 ...)
+(ShiftRightInt16x16 ...) => (VPSRLVW256 ...)
+(ShiftRightInt16x32 ...) => (VPSRLVW512 ...)
+(ShiftRightInt32x4 ...) => (VPSRLVD128 ...)
+(ShiftRightInt32x8 ...) => (VPSRLVD256 ...)
+(ShiftRightInt32x16 ...) => (VPSRLVD512 ...)
+(ShiftRightInt64x2 ...) => (VPSRLVQ128 ...)
+(ShiftRightInt64x4 ...) => (VPSRLVQ256 ...)
+(ShiftRightInt64x8 ...) => (VPSRLVQ512 ...)
+(ShiftRightUint16x8 ...) => (VPSRLVW128 ...)
+(ShiftRightUint16x16 ...) => (VPSRLVW256 ...)
+(ShiftRightUint16x32 ...) => (VPSRLVW512 ...)
+(ShiftRightUint32x4 ...) => (VPSRLVD128 ...)
+(ShiftRightUint32x8 ...) => (VPSRLVD256 ...)
+(ShiftRightUint32x16 ...) => (VPSRLVD512 ...)
+(ShiftRightUint64x2 ...) => (VPSRLVQ128 ...)
+(ShiftRightUint64x4 ...) => (VPSRLVQ256 ...)
+(ShiftRightUint64x8 ...) => (VPSRLVQ512 ...)
+(ShiftRightAndFillUpperFromInt16x8 ...) => (VPSHRDVW128 ...)
+(ShiftRightAndFillUpperFromInt16x16 ...) => (VPSHRDVW256 ...)
+(ShiftRightAndFillUpperFromInt16x32 ...) => (VPSHRDVW512 ...)
+(ShiftRightAndFillUpperFromInt32x4 ...) => (VPSHRDVD128 ...)
+(ShiftRightAndFillUpperFromInt32x8 ...) => (VPSHRDVD256 ...)
+(ShiftRightAndFillUpperFromInt32x16 ...) => (VPSHRDVD512 ...)
+(ShiftRightAndFillUpperFromInt64x2 ...) => (VPSHRDVQ128 ...)
+(ShiftRightAndFillUpperFromInt64x4 ...) => (VPSHRDVQ256 ...)
+(ShiftRightAndFillUpperFromInt64x8 ...) => (VPSHRDVQ512 ...)
+(ShiftRightAndFillUpperFromUint16x8 ...) => (VPSHRDVW128 ...)
+(ShiftRightAndFillUpperFromUint16x16 ...) => (VPSHRDVW256 ...)
+(ShiftRightAndFillUpperFromUint16x32 ...) => (VPSHRDVW512 ...)
+(ShiftRightAndFillUpperFromUint32x4 ...) => (VPSHRDVD128 ...)
+(ShiftRightAndFillUpperFromUint32x8 ...) => (VPSHRDVD256 ...)
+(ShiftRightAndFillUpperFromUint32x16 ...) => (VPSHRDVD512 ...)
+(ShiftRightAndFillUpperFromUint64x2 ...) => (VPSHRDVQ128 ...)
+(ShiftRightAndFillUpperFromUint64x4 ...) => (VPSHRDVQ256 ...)
+(ShiftRightAndFillUpperFromUint64x8 ...) => (VPSHRDVQ512 ...)
+(ShiftRightSignExtendedInt16x8 ...) => (VPSRAVW128 ...)
+(ShiftRightSignExtendedInt16x16 ...) => (VPSRAVW256 ...)
+(ShiftRightSignExtendedInt16x32 ...) => (VPSRAVW512 ...)
+(ShiftRightSignExtendedInt32x4 ...) => (VPSRAVD128 ...)
+(ShiftRightSignExtendedInt32x8 ...) => (VPSRAVD256 ...)
+(ShiftRightSignExtendedInt32x16 ...) => (VPSRAVD512 ...)
+(ShiftRightSignExtendedInt64x2 ...) => (VPSRAVQ128 ...)
+(ShiftRightSignExtendedInt64x4 ...) => (VPSRAVQ256 ...)
+(ShiftRightSignExtendedInt64x8 ...) => (VPSRAVQ512 ...)
+(ShiftRightSignExtendedUint16x8 ...) => (VPSRAVW128 ...)
+(ShiftRightSignExtendedUint16x16 ...) => (VPSRAVW256 ...)
+(ShiftRightSignExtendedUint16x32 ...) => (VPSRAVW512 ...)
+(ShiftRightSignExtendedUint32x4 ...) => (VPSRAVD128 ...)
+(ShiftRightSignExtendedUint32x8 ...) => (VPSRAVD256 ...)
+(ShiftRightSignExtendedUint32x16 ...) => (VPSRAVD512 ...)
+(ShiftRightSignExtendedUint64x2 ...) => (VPSRAVQ128 ...)
+(ShiftRightSignExtendedUint64x4 ...) => (VPSRAVQ256 ...)
+(ShiftRightSignExtendedUint64x8 ...) => (VPSRAVQ512 ...)
 (SignInt8x16 ...) => (VPSIGNB128 ...)
 (SignInt8x32 ...) => (VPSIGNB256 ...)
 (SignInt16x8 ...) => (VPSIGNW128 ...)
diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
index 93b136230d0..cbddbe0ff6e 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
@@ -233,6 +233,11 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
 		{name: "VPOPCNTWMasked256", argLength: 2, reg: fpkfp, asm: "VPOPCNTW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPADDSWMasked256", argLength: 3, reg: fp2kfp, asm: "VPADDSW", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPSUBSWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSLLVWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSLLVW", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSHLDVWMasked256", argLength: 4, reg: fp3kfp, asm: "VPSHLDVW", commutative: false, typ: "Vec256", resultInArg0: true},
+		{name: "VPSRLVWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRLVW", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSHRDVWMasked256", argLength: 4, reg: fp3kfp, asm: "VPSHRDVW", commutative: false, typ: "Vec256", resultInArg0: true},
+		{name: "VPSRAVWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRAVW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSUBWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSUBW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPMAXSW256", argLength: 2, reg: fp21, asm: "VPMAXSW", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPMINSW256", argLength: 2, reg: fp21, asm: "VPMINSW", commutative: true, typ: "Vec256", resultInArg0: false},
@@ -246,6 +251,14 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
 		{name: "VPHADDSW256", argLength: 2, reg: fp21, asm: "VPHADDSW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPHSUBSW256", argLength: 2, reg: fp21, asm: "VPHSUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSUBSW256", argLength: 2, reg: fp21, asm: "VPSUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSLLW256", argLength: 2, reg: fp21, asm: "VPSLLW", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLW256", argLength: 2, reg: fp21, asm: "VPSRLW", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRAW256", argLength: 2, reg: fp21, asm: "VPSRAW", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSLLVW256", argLength: 2, reg: fp21, asm: "VPSLLVW", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSHLDVW256", argLength: 3, reg: fp31, asm: "VPSHLDVW", commutative: false, typ: "Vec256", resultInArg0: true},
+		{name: "VPSRLVW256", argLength: 2, reg: fp21, asm: "VPSRLVW", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSHRDVW256", argLength: 3, reg: fp31, asm: "VPSHRDVW", commutative: false, typ: "Vec256", resultInArg0: true},
+		{name: "VPSRAVW256", argLength: 2, reg: fp21, asm: "VPSRAVW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSIGNW256", argLength: 2, reg: fp21, asm: "VPSIGNW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSUBW256", argLength: 2, reg: fp21, asm: "VPSUBW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPABSW512", argLength: 1, reg: fp11, asm: "VPABSW", commutative: false, typ: "Vec512", resultInArg0: false},
@@ -260,6 +273,11 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
 		{name: "VPOPCNTWMasked512", argLength: 2, reg: fpkfp, asm: "VPOPCNTW", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPADDSWMasked512", argLength: 3, reg: fp2kfp, asm: "VPADDSW", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPSUBSWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSLLVWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSLLVW", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSHLDVWMasked512", argLength: 4, reg: fp3kfp, asm: "VPSHLDVW", commutative: false, typ: "Vec512", resultInArg0: true},
+		{name: "VPSRLVWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRLVW", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSHRDVWMasked512", argLength: 4, reg: fp3kfp, asm: "VPSHRDVW", commutative: false, typ: "Vec512", resultInArg0: true},
+		{name: "VPSRAVWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRAVW", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSUBWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSUBW", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPMAXSW512", argLength: 2, reg: fp21, asm: "VPMAXSW", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMINSW512", argLength: 2, reg: fp21, asm: "VPMINSW", commutative: true, typ: "Vec512", resultInArg0: false},
@@ -269,6 +287,11 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
 		{name: "VPOPCNTW512", argLength: 1, reg: fp11, asm: "VPOPCNTW", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPADDSW512", argLength: 2, reg: fp21, asm: "VPADDSW", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPSUBSW512", argLength: 2, reg: fp21, asm: "VPSUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSLLVW512", argLength: 2, reg: fp21, asm: "VPSLLVW", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSHLDVW512", argLength: 3, reg: fp31, asm: "VPSHLDVW", commutative: false, typ: "Vec512", resultInArg0: true},
+		{name: "VPSRLVW512", argLength: 2, reg: fp21, asm: "VPSRLVW", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSHRDVW512", argLength: 3, reg: fp31, asm: "VPSHRDVW", commutative: false, typ: "Vec512", resultInArg0: true},
+		{name: "VPSRAVW512", argLength: 2, reg: fp21, asm: "VPSRAVW", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSUBW512", argLength: 2, reg: fp21, asm: "VPSUBW", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPABSW128", argLength: 1, reg: fp11, asm: "VPABSW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPADDW128", argLength: 2, reg: fp21, asm: "VPADDW", commutative: true, typ: "Vec128", resultInArg0: false},
@@ -284,6 +307,11 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
 		{name: "VPOPCNTWMasked128", argLength: 2, reg: fpkfp, asm: "VPOPCNTW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPADDSWMasked128", argLength: 3, reg: fp2kfp, asm: "VPADDSW", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPSUBSWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSUBSW", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSLLVWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSLLVW", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSHLDVWMasked128", argLength: 4, reg: fp3kfp, asm: "VPSHLDVW", commutative: false, typ: "Vec128", resultInArg0: true},
+		{name: "VPSRLVWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRLVW", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSHRDVWMasked128", argLength: 4, reg: fp3kfp, asm: "VPSHRDVW", commutative: false, typ: "Vec128", resultInArg0: true},
+		{name: "VPSRAVWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRAVW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSUBWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSUBW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPMAXSW128", argLength: 2, reg: fp21, asm: "VPMAXSW", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPMINSW128", argLength: 2, reg: fp21, asm: "VPMINSW", commutative: true, typ: "Vec128", resultInArg0: false},
@@ -297,6 +325,14 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
 		{name: "VPHADDSW128", argLength: 2, reg: fp21, asm: "VPHADDSW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPHSUBSW128", argLength: 2, reg: fp21, asm: "VPHSUBSW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSUBSW128", argLength: 2, reg: fp21, asm: "VPSUBSW", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSLLW128", argLength: 2, reg: fp21, asm: "VPSLLW", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLW128", argLength: 2, reg: fp21, asm: "VPSRLW", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRAW128", argLength: 2, reg: fp21, asm: "VPSRAW", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSLLVW128", argLength: 2, reg: fp21, asm: "VPSLLVW", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSHLDVW128", argLength: 3, reg: fp31, asm: "VPSHLDVW", commutative: false, typ: "Vec128", resultInArg0: true},
+		{name: "VPSRLVW128", argLength: 2, reg: fp21, asm: "VPSRLVW", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSHRDVW128", argLength: 3, reg: fp31, asm: "VPSHRDVW", commutative: false, typ: "Vec128", resultInArg0: true},
+		{name: "VPSRAVW128", argLength: 2, reg: fp21, asm: "VPSRAVW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSIGNW128", argLength: 2, reg: fp21, asm: "VPSIGNW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSUBW128", argLength: 2, reg: fp21, asm: "VPSUBW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPABSD512", argLength: 1, reg: fp11, asm: "VPABSD", commutative: false, typ: "Vec512", resultInArg0: false},
@@ -313,8 +349,15 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
 		{name: "VPORDMasked512", argLength: 3, reg: fp2kfp, asm: "VPORD", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPDPWSSDMasked512", argLength: 4, reg: fp3kfp, asm: "VPDPWSSD", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPOPCNTDMasked512", argLength: 2, reg: fpkfp, asm: "VPOPCNTD", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPROLVDMasked512", argLength: 3, reg: fp2kfp, asm: "VPROLVD", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPRORVDMasked512", argLength: 3, reg: fp2kfp, asm: "VPRORVD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPDPWSSDSMasked512", argLength: 4, reg: fp3kfp, asm: "VPDPWSSDS", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPDPBUSDSMasked512", argLength: 4, reg: fp3kfp, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true},
+		{name: "VPSLLVDMasked512", argLength: 3, reg: fp2kfp, asm: "VPSLLVD", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSHLDVDMasked512", argLength: 4, reg: fp3kfp, asm: "VPSHLDVD", commutative: false, typ: "Vec512", resultInArg0: true},
+		{name: "VPSRLVDMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRLVD", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSHRDVDMasked512", argLength: 4, reg: fp3kfp, asm: "VPSHRDVD", commutative: false, typ: "Vec512", resultInArg0: true},
+		{name: "VPSRAVDMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRAVD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSUBDMasked512", argLength: 3, reg: fp2kfp, asm: "VPSUBD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPDPBUSDMasked512", argLength: 4, reg: fp3kfp, asm: "VPDPBUSD", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPXORDMasked512", argLength: 3, reg: fp2kfp, asm: "VPXORD", commutative: true, typ: "Vec512", resultInArg0: false},
@@ -324,8 +367,15 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
 		{name: "VPORD512", argLength: 2, reg: fp21, asm: "VPORD", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPDPWSSD512", argLength: 3, reg: fp31, asm: "VPDPWSSD", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPOPCNTD512", argLength: 1, reg: fp11, asm: "VPOPCNTD", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPROLVD512", argLength: 2, reg: fp21, asm: "VPROLVD", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPRORVD512", argLength: 2, reg: fp21, asm: "VPRORVD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPDPWSSDS512", argLength: 3, reg: fp31, asm: "VPDPWSSDS", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPDPBUSDS512", argLength: 3, reg: fp31, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true},
+		{name: "VPSLLVD512", argLength: 2, reg: fp21, asm: "VPSLLVD", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSHLDVD512", argLength: 3, reg: fp31, asm: "VPSHLDVD", commutative: false, typ: "Vec512", resultInArg0: true},
+		{name: "VPSRLVD512", argLength: 2, reg: fp21, asm: "VPSRLVD", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSHRDVD512", argLength: 3, reg: fp31, asm: "VPSHRDVD", commutative: false, typ: "Vec512", resultInArg0: true},
+		{name: "VPSRAVD512", argLength: 2, reg: fp21, asm: "VPSRAVD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSUBD512", argLength: 2, reg: fp21, asm: "VPSUBD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPDPBUSD512", argLength: 3, reg: fp31, asm: "VPDPBUSD", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPXORD512", argLength: 2, reg: fp21, asm: "VPXORD", commutative: true, typ: "Vec512", resultInArg0: false},
@@ -343,8 +393,15 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
 		{name: "VPORDMasked128", argLength: 3, reg: fp2kfp, asm: "VPORD", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPDPWSSDMasked128", argLength: 4, reg: fp3kfp, asm: "VPDPWSSD", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPOPCNTDMasked128", argLength: 2, reg: fpkfp, asm: "VPOPCNTD", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPROLVDMasked128", argLength: 3, reg: fp2kfp, asm: "VPROLVD", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPRORVDMasked128", argLength: 3, reg: fp2kfp, asm: "VPRORVD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPDPWSSDSMasked128", argLength: 4, reg: fp3kfp, asm: "VPDPWSSDS", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPDPBUSDSMasked128", argLength: 4, reg: fp3kfp, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true},
+		{name: "VPSLLVDMasked128", argLength: 3, reg: fp2kfp, asm: "VPSLLVD", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSHLDVDMasked128", argLength: 4, reg: fp3kfp, asm: "VPSHLDVD", commutative: false, typ: "Vec128", resultInArg0: true},
+		{name: "VPSRLVDMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRLVD", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSHRDVDMasked128", argLength: 4, reg: fp3kfp, asm: "VPSHRDVD", commutative: false, typ: "Vec128", resultInArg0: true},
+		{name: "VPSRAVDMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRAVD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSUBDMasked128", argLength: 3, reg: fp2kfp, asm: "VPSUBD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPDPBUSDMasked128", argLength: 4, reg: fp3kfp, asm: "VPDPBUSD", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPXORDMasked128", argLength: 3, reg: fp2kfp, asm: "VPXORD", commutative: true, typ: "Vec128", resultInArg0: false},
@@ -356,8 +413,18 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
 		{name: "VPHADDD128", argLength: 2, reg: fp21, asm: "VPHADDD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPHSUBD128", argLength: 2, reg: fp21, asm: "VPHSUBD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPOPCNTD128", argLength: 1, reg: fp11, asm: "VPOPCNTD", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPROLVD128", argLength: 2, reg: fp21, asm: "VPROLVD", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPRORVD128", argLength: 2, reg: fp21, asm: "VPRORVD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPDPWSSDS128", argLength: 3, reg: fp31, asm: "VPDPWSSDS", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPDPBUSDS128", argLength: 3, reg: fp31, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true},
+		{name: "VPSLLD128", argLength: 2, reg: fp21, asm: "VPSLLD", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLD128", argLength: 2, reg: fp21, asm: "VPSRLD", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRAD128", argLength: 2, reg: fp21, asm: "VPSRAD", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSLLVD128", argLength: 2, reg: fp21, asm: "VPSLLVD", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSHLDVD128", argLength: 3, reg: fp31, asm: "VPSHLDVD", commutative: false, typ: "Vec128", resultInArg0: true},
+		{name: "VPSRLVD128", argLength: 2, reg: fp21, asm: "VPSRLVD", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSHRDVD128", argLength: 3, reg: fp31, asm: "VPSHRDVD", commutative: false, typ: "Vec128", resultInArg0: true},
+		{name: "VPSRAVD128", argLength: 2, reg: fp21, asm: "VPSRAVD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSIGND128", argLength: 2, reg: fp21, asm: "VPSIGND", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSUBD128", argLength: 2, reg: fp21, asm: "VPSUBD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPDPBUSD128", argLength: 3, reg: fp31, asm: "VPDPBUSD", commutative: false, typ: "Vec128", resultInArg0: true},
@@ -375,8 +442,15 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
 		{name: "VPORDMasked256", argLength: 3, reg: fp2kfp, asm: "VPORD", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPDPWSSDMasked256", argLength: 4, reg: fp3kfp, asm: "VPDPWSSD", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPOPCNTDMasked256", argLength: 2, reg: fpkfp, asm: "VPOPCNTD", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPROLVDMasked256", argLength: 3, reg: fp2kfp, asm: "VPROLVD", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPRORVDMasked256", argLength: 3, reg: fp2kfp, asm: "VPRORVD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPDPWSSDSMasked256", argLength: 4, reg: fp3kfp, asm: "VPDPWSSDS", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPDPBUSDSMasked256", argLength: 4, reg: fp3kfp, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true},
+		{name: "VPSLLVDMasked256", argLength: 3, reg: fp2kfp, asm: "VPSLLVD", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSHLDVDMasked256", argLength: 4, reg: fp3kfp, asm: "VPSHLDVD", commutative: false, typ: "Vec256", resultInArg0: true},
+		{name: "VPSRLVDMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRLVD", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSHRDVDMasked256", argLength: 4, reg: fp3kfp, asm: "VPSHRDVD", commutative: false, typ: "Vec256", resultInArg0: true},
+		{name: "VPSRAVDMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRAVD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSUBDMasked256", argLength: 3, reg: fp2kfp, asm: "VPSUBD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPDPBUSDMasked256", argLength: 4, reg: fp3kfp, asm: "VPDPBUSD", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPXORDMasked256", argLength: 3, reg: fp2kfp, asm: "VPXORD", commutative: true, typ: "Vec256", resultInArg0: false},
@@ -388,8 +462,18 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
 		{name: "VPHADDD256", argLength: 2, reg: fp21, asm: "VPHADDD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPHSUBD256", argLength: 2, reg: fp21, asm: "VPHSUBD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPOPCNTD256", argLength: 1, reg: fp11, asm: "VPOPCNTD", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPROLVD256", argLength: 2, reg: fp21, asm: "VPROLVD", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPRORVD256", argLength: 2, reg: fp21, asm: "VPRORVD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPDPWSSDS256", argLength: 3, reg: fp31, asm: "VPDPWSSDS", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPDPBUSDS256", argLength: 3, reg: fp31, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true},
+		{name: "VPSLLD256", argLength: 2, reg: fp21, asm: "VPSLLD", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLD256", argLength: 2, reg: fp21, asm: "VPSRLD", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRAD256", argLength: 2, reg: fp21, asm: "VPSRAD", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSLLVD256", argLength: 2, reg: fp21, asm: "VPSLLVD", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSHLDVD256", argLength: 3, reg: fp31, asm: "VPSHLDVD", commutative: false, typ: "Vec256", resultInArg0: true},
+		{name: "VPSRLVD256", argLength: 2, reg: fp21, asm: "VPSRLVD", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSHRDVD256", argLength: 3, reg: fp31, asm: "VPSHRDVD", commutative: false, typ: "Vec256", resultInArg0: true},
+		{name: "VPSRAVD256", argLength: 2, reg: fp21, asm: "VPSRAVD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSIGND256", argLength: 2, reg: fp21, asm: "VPSIGND", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSUBD256", argLength: 2, reg: fp21, asm: "VPSUBD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPDPBUSD256", argLength: 3, reg: fp31, asm: "VPDPBUSD", commutative: false, typ: "Vec256", resultInArg0: true},
@@ -406,12 +490,32 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
 		{name: "VPMULLQMasked128", argLength: 3, reg: fp2kfp, asm: "VPMULLQ", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPORQMasked128", argLength: 3, reg: fp2kfp, asm: "VPORQ", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPOPCNTQMasked128", argLength: 2, reg: fpkfp, asm: "VPOPCNTQ", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPROLVQMasked128", argLength: 3, reg: fp2kfp, asm: "VPROLVQ", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPRORVQMasked128", argLength: 3, reg: fp2kfp, asm: "VPRORVQ", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSLLQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSLLQ", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRLQ", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRAQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRAQ", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSLLVQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSLLVQ", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSHLDVQMasked128", argLength: 4, reg: fp3kfp, asm: "VPSHLDVQ", commutative: false, typ: "Vec128", resultInArg0: true},
+		{name: "VPSRLVQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRLVQ", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSHRDVQMasked128", argLength: 4, reg: fp3kfp, asm: "VPSHRDVQ", commutative: false, typ: "Vec128", resultInArg0: true},
+		{name: "VPSRAVQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRAVQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSUBQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSUBQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPXORQMasked128", argLength: 3, reg: fp2kfp, asm: "VPXORQ", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPMAXSQ128", argLength: 2, reg: fp21, asm: "VPMAXSQ", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPMINSQ128", argLength: 2, reg: fp21, asm: "VPMINSQ", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPMULLQ128", argLength: 2, reg: fp21, asm: "VPMULLQ", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPOPCNTQ128", argLength: 1, reg: fp11, asm: "VPOPCNTQ", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPROLVQ128", argLength: 2, reg: fp21, asm: "VPROLVQ", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPRORVQ128", argLength: 2, reg: fp21, asm: "VPRORVQ", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSLLQ128", argLength: 2, reg: fp21, asm: "VPSLLQ", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLQ128", argLength: 2, reg: fp21, asm: "VPSRLQ", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRAQ128", argLength: 2, reg: fp21, asm: "VPSRAQ", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSLLVQ128", argLength: 2, reg: fp21, asm: "VPSLLVQ", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSHLDVQ128", argLength: 3, reg: fp31, asm: "VPSHLDVQ", commutative: false, typ: "Vec128", resultInArg0: true},
+		{name: "VPSRLVQ128", argLength: 2, reg: fp21, asm: "VPSRLVQ", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSHRDVQ128", argLength: 3, reg: fp31, asm: "VPSHRDVQ", commutative: false, typ: "Vec128", resultInArg0: true},
+		{name: "VPSRAVQ128", argLength: 2, reg: fp21, asm: "VPSRAVQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSUBQ128", argLength: 2, reg: fp21, asm: "VPSUBQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPABSQ256", argLength: 1, reg: fp11, asm: "VPABSQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPADDQ256", argLength: 2, reg: fp21, asm: "VPADDQ", commutative: true, typ: "Vec256", resultInArg0: false},
@@ -427,12 +531,32 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
 		{name: "VPMULLQMasked256", argLength: 3, reg: fp2kfp, asm: "VPMULLQ", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPORQMasked256", argLength: 3, reg: fp2kfp, asm: "VPORQ", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPOPCNTQMasked256", argLength: 2, reg: fpkfp, asm: "VPOPCNTQ", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPROLVQMasked256", argLength: 3, reg: fp2kfp, asm: "VPROLVQ", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPRORVQMasked256", argLength: 3, reg: fp2kfp, asm: "VPRORVQ", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSLLQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSLLQ", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRLQ", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRAQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRAQ", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSLLVQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSLLVQ", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSHLDVQMasked256", argLength: 4, reg: fp3kfp, asm: "VPSHLDVQ", commutative: false, typ: "Vec256", resultInArg0: true},
+		{name: "VPSRLVQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRLVQ", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSHRDVQMasked256", argLength: 4, reg: fp3kfp, asm: "VPSHRDVQ", commutative: false, typ: "Vec256", resultInArg0: true},
+		{name: "VPSRAVQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRAVQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSUBQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSUBQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPXORQMasked256", argLength: 3, reg: fp2kfp, asm: "VPXORQ", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPMAXSQ256", argLength: 2, reg: fp21, asm: "VPMAXSQ", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPMINSQ256", argLength: 2, reg: fp21, asm: "VPMINSQ", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPMULLQ256", argLength: 2, reg: fp21, asm: "VPMULLQ", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPOPCNTQ256", argLength: 1, reg: fp11, asm: "VPOPCNTQ", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPROLVQ256", argLength: 2, reg: fp21, asm: "VPROLVQ", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPRORVQ256", argLength: 2, reg: fp21, asm: "VPRORVQ", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSLLQ256", argLength: 2, reg: fp21, asm: "VPSLLQ", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLQ256", argLength: 2, reg: fp21, asm: "VPSRLQ", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRAQ256", argLength: 2, reg: fp21, asm: "VPSRAQ", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSLLVQ256", argLength: 2, reg: fp21, asm: "VPSLLVQ", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSHLDVQ256", argLength: 3, reg: fp31, asm: "VPSHLDVQ", commutative: false, typ: "Vec256", resultInArg0: true},
+		{name: "VPSRLVQ256", argLength: 2, reg: fp21, asm: "VPSRLVQ", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSHRDVQ256", argLength: 3, reg: fp31, asm: "VPSHRDVQ", commutative: false, typ: "Vec256", resultInArg0: true},
+		{name: "VPSRAVQ256", argLength: 2, reg: fp21, asm: "VPSRAVQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSUBQ256", argLength: 2, reg: fp21, asm: "VPSUBQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPABSQ512", argLength: 1, reg: fp11, asm: "VPABSQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPADDQ512", argLength: 2, reg: fp21, asm: "VPADDQ", commutative: true, typ: "Vec512", resultInArg0: false},
@@ -448,6 +572,16 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
 		{name: "VPMULLQMasked512", argLength: 3, reg: fp2kfp, asm: "VPMULLQ", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPORQMasked512", argLength: 3, reg: fp2kfp, asm: "VPORQ", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPOPCNTQMasked512", argLength: 2, reg: fpkfp, asm: "VPOPCNTQ", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPROLVQMasked512", argLength: 3, reg: fp2kfp, asm: "VPROLVQ", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPRORVQMasked512", argLength: 3, reg: fp2kfp, asm: "VPRORVQ", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSLLQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSLLQ", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRLQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRAQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRAQ", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSLLVQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSLLVQ", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSHLDVQMasked512", argLength: 4, reg: fp3kfp, asm: "VPSHLDVQ", commutative: false, typ: "Vec512", resultInArg0: true},
+		{name: "VPSRLVQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSHRDVQMasked512", argLength: 4, reg: fp3kfp, asm: "VPSHRDVQ", commutative: false, typ: "Vec512", resultInArg0: true},
+		{name: "VPSRAVQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRAVQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSUBQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSUBQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPXORQMasked512", argLength: 3, reg: fp2kfp, asm: "VPXORQ", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMAXSQ512", argLength: 2, reg: fp21, asm: "VPMAXSQ", commutative: true, typ: "Vec512", resultInArg0: false},
@@ -456,6 +590,16 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
 		{name: "VPMULLQ512", argLength: 2, reg: fp21, asm: "VPMULLQ", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPORQ512", argLength: 2, reg: fp21, asm: "VPORQ", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPOPCNTQ512", argLength: 1, reg: fp11, asm: "VPOPCNTQ", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPROLVQ512", argLength: 2, reg: fp21, asm: "VPROLVQ", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPRORVQ512", argLength: 2, reg: fp21, asm: "VPRORVQ", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSLLQ512", argLength: 2, reg: fp21, asm: "VPSLLQ", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRLQ512", argLength: 2, reg: fp21, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRAQ512", argLength: 2, reg: fp21, asm: "VPSRAQ", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSLLVQ512", argLength: 2, reg: fp21, asm: "VPSLLVQ", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSHLDVQ512", argLength: 3, reg: fp31, asm: "VPSHLDVQ", commutative: false, typ: "Vec512", resultInArg0: true},
+		{name: "VPSRLVQ512", argLength: 2, reg: fp21, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSHRDVQ512", argLength: 3, reg: fp31, asm: "VPSHRDVQ", commutative: false, typ: "Vec512", resultInArg0: true},
+		{name: "VPSRAVQ512", argLength: 2, reg: fp21, asm: "VPSRAVQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSUBQ512", argLength: 2, reg: fp21, asm: "VPSUBQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPXORQ512", argLength: 2, reg: fp21, asm: "VPXORQ", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPABSB128", argLength: 1, reg: fp11, asm: "VPABSB", commutative: false, typ: "Vec128", resultInArg0: false},
@@ -641,28 +785,88 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
 		{name: "VCMPPDMasked512", argLength: 3, reg: fp2kk, asm: "VCMPPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
 		{name: "VPCMPW256", argLength: 2, reg: fp2k, asm: "VPCMPW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
 		{name: "VPCMPWMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+		{name: "VPSHLDWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSHRDWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSHLDW256", argLength: 2, reg: fp21, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSHRDW256", argLength: 2, reg: fp21, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPCMPW512", argLength: 2, reg: fp2k, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
 		{name: "VPCMPWMasked512", argLength: 3, reg: fp2kk, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+		{name: "VPSHLDWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSHRDWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSHLDW512", argLength: 2, reg: fp21, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSHRDW512", argLength: 2, reg: fp21, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPEXTRW128", argLength: 1, reg: fpgp, asm: "VPEXTRW", aux: "Int8", commutative: false, typ: "int16", resultInArg0: false},
 		{name: "VPCMPW128", argLength: 2, reg: fp2k, asm: "VPCMPW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
 		{name: "VPCMPWMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+		{name: "VPSHLDWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSHRDWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPINSRW128", argLength: 2, reg: fpgpfp, asm: "VPINSRW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSHLDW128", argLength: 2, reg: fp21, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSHRDW128", argLength: 2, reg: fp21, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPCMPD512", argLength: 2, reg: fp2k, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
 		{name: "VPCMPDMasked512", argLength: 3, reg: fp2kk, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+		{name: "VPROLDMasked512", argLength: 2, reg: fpkfp, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPRORDMasked512", argLength: 2, reg: fpkfp, asm: "VPRORD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSHLDDMasked512", argLength: 3, reg: fp2kfp, asm: "VPSHLDD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSHRDDMasked512", argLength: 3, reg: fp2kfp, asm: "VPSHRDD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPROLD512", argLength: 1, reg: fp11, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPRORD512", argLength: 1, reg: fp11, asm: "VPRORD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSHLDD512", argLength: 2, reg: fp21, asm: "VPSHLDD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSHRDD512", argLength: 2, reg: fp21, asm: "VPSHRDD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPEXTRD128", argLength: 1, reg: fpgp, asm: "VPEXTRD", aux: "Int8", commutative: false, typ: "int32", resultInArg0: false},
 		{name: "VPCMPD128", argLength: 2, reg: fp2k, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
 		{name: "VPCMPDMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+		{name: "VPROLDMasked128", argLength: 2, reg: fpkfp, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPRORDMasked128", argLength: 2, reg: fpkfp, asm: "VPRORD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSHLDDMasked128", argLength: 3, reg: fp2kfp, asm: "VPSHLDD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSHRDDMasked128", argLength: 3, reg: fp2kfp, asm: "VPSHRDD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPROLD128", argLength: 1, reg: fp11, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPRORD128", argLength: 1, reg: fp11, asm: "VPRORD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPINSRD128", argLength: 2, reg: fpgpfp, asm: "VPINSRD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSHLDD128", argLength: 2, reg: fp21, asm: "VPSHLDD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSHRDD128", argLength: 2, reg: fp21, asm: "VPSHRDD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPCMPD256", argLength: 2, reg: fp2k, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
 		{name: "VPCMPDMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+		{name: "VPROLDMasked256", argLength: 2, reg: fpkfp, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPRORDMasked256", argLength: 2, reg: fpkfp, asm: "VPRORD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSHLDDMasked256", argLength: 3, reg: fp2kfp, asm: "VPSHLDD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSHRDDMasked256", argLength: 3, reg: fp2kfp, asm: "VPSHRDD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPROLD256", argLength: 1, reg: fp11, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPRORD256", argLength: 1, reg: fp11, asm: "VPRORD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSHLDD256", argLength: 2, reg: fp21, asm: "VPSHLDD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSHRDD256", argLength: 2, reg: fp21, asm: "VPSHRDD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPEXTRQ128", argLength: 1, reg: fpgp, asm: "VPEXTRQ", aux: "Int8", commutative: false, typ: "int64", resultInArg0: false},
 		{name: "VPCMPQ128", argLength: 2, reg: fp2k, asm: "VPCMPQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
 		{name: "VPCMPQMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+		{name: "VPROLQMasked128", argLength: 2, reg: fpkfp, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPRORQMasked128", argLength: 2, reg: fpkfp, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSHLDQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSHLDQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSHRDQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPROLQ128", argLength: 1, reg: fp11, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPRORQ128", argLength: 1, reg: fp11, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPINSRQ128", argLength: 2, reg: fpgpfp, asm: "VPINSRQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSHLDQ128", argLength: 2, reg: fp21, asm: "VPSHLDQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSHRDQ128", argLength: 2, reg: fp21, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPCMPQ256", argLength: 2, reg: fp2k, asm: "VPCMPQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
 		{name: "VPCMPQMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+		{name: "VPROLQMasked256", argLength: 2, reg: fpkfp, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPRORQMasked256", argLength: 2, reg: fpkfp, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSHLDQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSHLDQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSHRDQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPROLQ256", argLength: 1, reg: fp11, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPRORQ256", argLength: 1, reg: fp11, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSHLDQ256", argLength: 2, reg: fp21, asm: "VPSHLDQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSHRDQ256", argLength: 2, reg: fp21, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPCMPQ512", argLength: 2, reg: fp2k, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
 		{name: "VPCMPQMasked512", argLength: 3, reg: fp2kk, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+		{name: "VPROLQMasked512", argLength: 2, reg: fpkfp, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPRORQMasked512", argLength: 2, reg: fpkfp, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSHLDQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSHLDQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSHRDQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPROLQ512", argLength: 1, reg: fp11, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPRORQ512", argLength: 1, reg: fp11, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSHLDQ512", argLength: 2, reg: fp21, asm: "VPSHLDQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSHRDQ512", argLength: 2, reg: fp21, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPEXTRB128", argLength: 1, reg: fpgp, asm: "VPEXTRB", aux: "Int8", commutative: false, typ: "int8", resultInArg0: false},
 		{name: "VPCMPB128", argLength: 2, reg: fp2k, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
 		{name: "VPCMPBMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
index 1c33483f424..0f3d3f8214c 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
@@ -345,6 +345,11 @@ func simdGenericOps() []opData {
 		{name: "MaskedPopCountInt16x16", argLength: 2, commutative: false},
 		{name: "MaskedSaturatedAddInt16x16", argLength: 3, commutative: true},
 		{name: "MaskedSaturatedSubInt16x16", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftInt16x16", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftAndFillUpperFromInt16x16", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightInt16x16", argLength: 3, commutative: false},
+		{name: "MaskedShiftRightAndFillUpperFromInt16x16", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightSignExtendedInt16x16", argLength: 3, commutative: false},
 		{name: "MaskedSubInt16x16", argLength: 3, commutative: false},
 		{name: "MaxInt16x16", argLength: 2, commutative: true},
 		{name: "MinInt16x16", argLength: 2, commutative: true},
@@ -360,6 +365,14 @@ func simdGenericOps() []opData {
 		{name: "SaturatedPairwiseAddInt16x16", argLength: 2, commutative: false},
 		{name: "SaturatedPairwiseSubInt16x16", argLength: 2, commutative: false},
 		{name: "SaturatedSubInt16x16", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftInt16x16", argLength: 2, commutative: false},
+		{name: "ShiftAllRightInt16x16", argLength: 2, commutative: false},
+		{name: "ShiftAllRightSignExtendedInt16x16", argLength: 2, commutative: false},
+		{name: "ShiftLeftInt16x16", argLength: 2, commutative: false},
+		{name: "ShiftLeftAndFillUpperFromInt16x16", argLength: 3, commutative: false},
+		{name: "ShiftRightInt16x16", argLength: 2, commutative: false},
+		{name: "ShiftRightAndFillUpperFromInt16x16", argLength: 3, commutative: false},
+		{name: "ShiftRightSignExtendedInt16x16", argLength: 2, commutative: false},
 		{name: "SignInt16x16", argLength: 2, commutative: false},
 		{name: "SubInt16x16", argLength: 2, commutative: false},
 		{name: "XorInt16x16", argLength: 2, commutative: true},
@@ -386,6 +399,11 @@ func simdGenericOps() []opData {
 		{name: "MaskedPopCountInt16x32", argLength: 2, commutative: false},
 		{name: "MaskedSaturatedAddInt16x32", argLength: 3, commutative: true},
 		{name: "MaskedSaturatedSubInt16x32", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftInt16x32", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftAndFillUpperFromInt16x32", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightInt16x32", argLength: 3, commutative: false},
+		{name: "MaskedShiftRightAndFillUpperFromInt16x32", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightSignExtendedInt16x32", argLength: 3, commutative: false},
 		{name: "MaskedSubInt16x32", argLength: 3, commutative: false},
 		{name: "MaxInt16x32", argLength: 2, commutative: true},
 		{name: "MinInt16x32", argLength: 2, commutative: true},
@@ -396,6 +414,11 @@ func simdGenericOps() []opData {
 		{name: "PopCountInt16x32", argLength: 1, commutative: false},
 		{name: "SaturatedAddInt16x32", argLength: 2, commutative: true},
 		{name: "SaturatedSubInt16x32", argLength: 2, commutative: false},
+		{name: "ShiftLeftInt16x32", argLength: 2, commutative: false},
+		{name: "ShiftLeftAndFillUpperFromInt16x32", argLength: 3, commutative: false},
+		{name: "ShiftRightInt16x32", argLength: 2, commutative: false},
+		{name: "ShiftRightAndFillUpperFromInt16x32", argLength: 3, commutative: false},
+		{name: "ShiftRightSignExtendedInt16x32", argLength: 2, commutative: false},
 		{name: "SubInt16x32", argLength: 2, commutative: false},
 		{name: "AbsoluteInt16x8", argLength: 1, commutative: false},
 		{name: "AddInt16x8", argLength: 2, commutative: true},
@@ -422,6 +445,11 @@ func simdGenericOps() []opData {
 		{name: "MaskedPopCountInt16x8", argLength: 2, commutative: false},
 		{name: "MaskedSaturatedAddInt16x8", argLength: 3, commutative: true},
 		{name: "MaskedSaturatedSubInt16x8", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftInt16x8", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftAndFillUpperFromInt16x8", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightInt16x8", argLength: 3, commutative: false},
+		{name: "MaskedShiftRightAndFillUpperFromInt16x8", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightSignExtendedInt16x8", argLength: 3, commutative: false},
 		{name: "MaskedSubInt16x8", argLength: 3, commutative: false},
 		{name: "MaxInt16x8", argLength: 2, commutative: true},
 		{name: "MinInt16x8", argLength: 2, commutative: true},
@@ -437,6 +465,14 @@ func simdGenericOps() []opData {
 		{name: "SaturatedPairwiseAddInt16x8", argLength: 2, commutative: false},
 		{name: "SaturatedPairwiseSubInt16x8", argLength: 2, commutative: false},
 		{name: "SaturatedSubInt16x8", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftInt16x8", argLength: 2, commutative: false},
+		{name: "ShiftAllRightInt16x8", argLength: 2, commutative: false},
+		{name: "ShiftAllRightSignExtendedInt16x8", argLength: 2, commutative: false},
+		{name: "ShiftLeftInt16x8", argLength: 2, commutative: false},
+		{name: "ShiftLeftAndFillUpperFromInt16x8", argLength: 3, commutative: false},
+		{name: "ShiftRightInt16x8", argLength: 2, commutative: false},
+		{name: "ShiftRightAndFillUpperFromInt16x8", argLength: 3, commutative: false},
+		{name: "ShiftRightSignExtendedInt16x8", argLength: 2, commutative: false},
 		{name: "SignInt16x8", argLength: 2, commutative: false},
 		{name: "SubInt16x8", argLength: 2, commutative: false},
 		{name: "XorInt16x8", argLength: 2, commutative: true},
@@ -465,8 +501,15 @@ func simdGenericOps() []opData {
 		{name: "MaskedOrInt32x16", argLength: 3, commutative: true},
 		{name: "MaskedPairDotProdAccumulateInt32x16", argLength: 4, commutative: false},
 		{name: "MaskedPopCountInt32x16", argLength: 2, commutative: false},
+		{name: "MaskedRotateLeftInt32x16", argLength: 3, commutative: false},
+		{name: "MaskedRotateRightInt32x16", argLength: 3, commutative: false},
 		{name: "MaskedSaturatedPairDotProdAccumulateInt32x16", argLength: 4, commutative: false},
 		{name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 4, commutative: false},
+		{name: "MaskedShiftLeftInt32x16", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftAndFillUpperFromInt32x16", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightInt32x16", argLength: 3, commutative: false},
+		{name: "MaskedShiftRightAndFillUpperFromInt32x16", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightSignExtendedInt32x16", argLength: 3, commutative: false},
 		{name: "MaskedSubInt32x16", argLength: 3, commutative: false},
 		{name: "MaskedUnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 4, commutative: false},
 		{name: "MaskedXorInt32x16", argLength: 3, commutative: true},
@@ -477,8 +520,15 @@ func simdGenericOps() []opData {
 		{name: "OrInt32x16", argLength: 2, commutative: true},
 		{name: "PairDotProdAccumulateInt32x16", argLength: 3, commutative: false},
 		{name: "PopCountInt32x16", argLength: 1, commutative: false},
+		{name: "RotateLeftInt32x16", argLength: 2, commutative: false},
+		{name: "RotateRightInt32x16", argLength: 2, commutative: false},
 		{name: "SaturatedPairDotProdAccumulateInt32x16", argLength: 3, commutative: false},
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 3, commutative: false},
+		{name: "ShiftLeftInt32x16", argLength: 2, commutative: false},
+		{name: "ShiftLeftAndFillUpperFromInt32x16", argLength: 3, commutative: false},
+		{name: "ShiftRightInt32x16", argLength: 2, commutative: false},
+		{name: "ShiftRightAndFillUpperFromInt32x16", argLength: 3, commutative: false},
+		{name: "ShiftRightSignExtendedInt32x16", argLength: 2, commutative: false},
 		{name: "SubInt32x16", argLength: 2, commutative: false},
 		{name: "UnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 3, commutative: false},
 		{name: "XorInt32x16", argLength: 2, commutative: true},
@@ -507,8 +557,15 @@ func simdGenericOps() []opData {
 		{name: "MaskedOrInt32x4", argLength: 3, commutative: true},
 		{name: "MaskedPairDotProdAccumulateInt32x4", argLength: 4, commutative: false},
 		{name: "MaskedPopCountInt32x4", argLength: 2, commutative: false},
+		{name: "MaskedRotateLeftInt32x4", argLength: 3, commutative: false},
+		{name: "MaskedRotateRightInt32x4", argLength: 3, commutative: false},
 		{name: "MaskedSaturatedPairDotProdAccumulateInt32x4", argLength: 4, commutative: false},
 		{name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 4, commutative: false},
+		{name: "MaskedShiftLeftInt32x4", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftAndFillUpperFromInt32x4", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightInt32x4", argLength: 3, commutative: false},
+		{name: "MaskedShiftRightAndFillUpperFromInt32x4", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightSignExtendedInt32x4", argLength: 3, commutative: false},
 		{name: "MaskedSubInt32x4", argLength: 3, commutative: false},
 		{name: "MaskedUnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 4, commutative: false},
 		{name: "MaskedXorInt32x4", argLength: 3, commutative: true},
@@ -522,8 +579,18 @@ func simdGenericOps() []opData {
 		{name: "PairwiseAddInt32x4", argLength: 2, commutative: false},
 		{name: "PairwiseSubInt32x4", argLength: 2, commutative: false},
 		{name: "PopCountInt32x4", argLength: 1, commutative: false},
+		{name: "RotateLeftInt32x4", argLength: 2, commutative: false},
+		{name: "RotateRightInt32x4", argLength: 2, commutative: false},
 		{name: "SaturatedPairDotProdAccumulateInt32x4", argLength: 3, commutative: false},
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 3, commutative: false},
+		{name: "ShiftAllLeftInt32x4", argLength: 2, commutative: false},
+		{name: "ShiftAllRightInt32x4", argLength: 2, commutative: false},
+		{name: "ShiftAllRightSignExtendedInt32x4", argLength: 2, commutative: false},
+		{name: "ShiftLeftInt32x4", argLength: 2, commutative: false},
+		{name: "ShiftLeftAndFillUpperFromInt32x4", argLength: 3, commutative: false},
+		{name: "ShiftRightInt32x4", argLength: 2, commutative: false},
+		{name: "ShiftRightAndFillUpperFromInt32x4", argLength: 3, commutative: false},
+		{name: "ShiftRightSignExtendedInt32x4", argLength: 2, commutative: false},
 		{name: "SignInt32x4", argLength: 2, commutative: false},
 		{name: "SubInt32x4", argLength: 2, commutative: false},
 		{name: "UnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 3, commutative: false},
@@ -553,8 +620,15 @@ func simdGenericOps() []opData {
 		{name: "MaskedOrInt32x8", argLength: 3, commutative: true},
 		{name: "MaskedPairDotProdAccumulateInt32x8", argLength: 4, commutative: false},
 		{name: "MaskedPopCountInt32x8", argLength: 2, commutative: false},
+		{name: "MaskedRotateLeftInt32x8", argLength: 3, commutative: false},
+		{name: "MaskedRotateRightInt32x8", argLength: 3, commutative: false},
 		{name: "MaskedSaturatedPairDotProdAccumulateInt32x8", argLength: 4, commutative: false},
 		{name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 4, commutative: false},
+		{name: "MaskedShiftLeftInt32x8", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftAndFillUpperFromInt32x8", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightInt32x8", argLength: 3, commutative: false},
+		{name: "MaskedShiftRightAndFillUpperFromInt32x8", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightSignExtendedInt32x8", argLength: 3, commutative: false},
 		{name: "MaskedSubInt32x8", argLength: 3, commutative: false},
 		{name: "MaskedUnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 4, commutative: false},
 		{name: "MaskedXorInt32x8", argLength: 3, commutative: true},
@@ -568,8 +642,18 @@ func simdGenericOps() []opData {
 		{name: "PairwiseAddInt32x8", argLength: 2, commutative: false},
 		{name: "PairwiseSubInt32x8", argLength: 2, commutative: false},
 		{name: "PopCountInt32x8", argLength: 1, commutative: false},
+		{name: "RotateLeftInt32x8", argLength: 2, commutative: false},
+		{name: "RotateRightInt32x8", argLength: 2, commutative: false},
 		{name: "SaturatedPairDotProdAccumulateInt32x8", argLength: 3, commutative: false},
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 3, commutative: false},
+		{name: "ShiftAllLeftInt32x8", argLength: 2, commutative: false},
+		{name: "ShiftAllRightInt32x8", argLength: 2, commutative: false},
+		{name: "ShiftAllRightSignExtendedInt32x8", argLength: 2, commutative: false},
+		{name: "ShiftLeftInt32x8", argLength: 2, commutative: false},
+		{name: "ShiftLeftAndFillUpperFromInt32x8", argLength: 3, commutative: false},
+		{name: "ShiftRightInt32x8", argLength: 2, commutative: false},
+		{name: "ShiftRightAndFillUpperFromInt32x8", argLength: 3, commutative: false},
+		{name: "ShiftRightSignExtendedInt32x8", argLength: 2, commutative: false},
 		{name: "SignInt32x8", argLength: 2, commutative: false},
 		{name: "SubInt32x8", argLength: 2, commutative: false},
 		{name: "UnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 3, commutative: false},
@@ -599,6 +683,16 @@ func simdGenericOps() []opData {
 		{name: "MaskedNotEqualInt64x2", argLength: 3, commutative: true},
 		{name: "MaskedOrInt64x2", argLength: 3, commutative: true},
 		{name: "MaskedPopCountInt64x2", argLength: 2, commutative: false},
+		{name: "MaskedRotateLeftInt64x2", argLength: 3, commutative: false},
+		{name: "MaskedRotateRightInt64x2", argLength: 3, commutative: false},
+		{name: "MaskedShiftAllLeftInt64x2", argLength: 3, commutative: false},
+		{name: "MaskedShiftAllRightInt64x2", argLength: 3, commutative: false},
+		{name: "MaskedShiftAllRightSignExtendedInt64x2", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftInt64x2", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftAndFillUpperFromInt64x2", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightInt64x2", argLength: 3, commutative: false},
+		{name: "MaskedShiftRightAndFillUpperFromInt64x2", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightSignExtendedInt64x2", argLength: 3, commutative: false},
 		{name: "MaskedSubInt64x2", argLength: 3, commutative: false},
 		{name: "MaskedXorInt64x2", argLength: 3, commutative: true},
 		{name: "MaxInt64x2", argLength: 2, commutative: true},
@@ -608,6 +702,16 @@ func simdGenericOps() []opData {
 		{name: "NotEqualInt64x2", argLength: 2, commutative: true},
 		{name: "OrInt64x2", argLength: 2, commutative: true},
 		{name: "PopCountInt64x2", argLength: 1, commutative: false},
+		{name: "RotateLeftInt64x2", argLength: 2, commutative: false},
+		{name: "RotateRightInt64x2", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftInt64x2", argLength: 2, commutative: false},
+		{name: "ShiftAllRightInt64x2", argLength: 2, commutative: false},
+		{name: "ShiftAllRightSignExtendedInt64x2", argLength: 2, commutative: false},
+		{name: "ShiftLeftInt64x2", argLength: 2, commutative: false},
+		{name: "ShiftLeftAndFillUpperFromInt64x2", argLength: 3, commutative: false},
+		{name: "ShiftRightInt64x2", argLength: 2, commutative: false},
+		{name: "ShiftRightAndFillUpperFromInt64x2", argLength: 3, commutative: false},
+		{name: "ShiftRightSignExtendedInt64x2", argLength: 2, commutative: false},
 		{name: "SubInt64x2", argLength: 2, commutative: false},
 		{name: "XorInt64x2", argLength: 2, commutative: true},
 		{name: "AbsoluteInt64x4", argLength: 1, commutative: false},
@@ -635,6 +739,16 @@ func simdGenericOps() []opData {
 		{name: "MaskedNotEqualInt64x4", argLength: 3, commutative: true},
 		{name: "MaskedOrInt64x4", argLength: 3, commutative: true},
 		{name: "MaskedPopCountInt64x4", argLength: 2, commutative: false},
+		{name: "MaskedRotateLeftInt64x4", argLength: 3, commutative: false},
+		{name: "MaskedRotateRightInt64x4", argLength: 3, commutative: false},
+		{name: "MaskedShiftAllLeftInt64x4", argLength: 3, commutative: false},
+		{name: "MaskedShiftAllRightInt64x4", argLength: 3, commutative: false},
+		{name: "MaskedShiftAllRightSignExtendedInt64x4", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftInt64x4", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftAndFillUpperFromInt64x4", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightInt64x4", argLength: 3, commutative: false},
+		{name: "MaskedShiftRightAndFillUpperFromInt64x4", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightSignExtendedInt64x4", argLength: 3, commutative: false},
 		{name: "MaskedSubInt64x4", argLength: 3, commutative: false},
 		{name: "MaskedXorInt64x4", argLength: 3, commutative: true},
 		{name: "MaxInt64x4", argLength: 2, commutative: true},
@@ -644,6 +758,16 @@ func simdGenericOps() []opData {
 		{name: "NotEqualInt64x4", argLength: 2, commutative: true},
 		{name: "OrInt64x4", argLength: 2, commutative: true},
 		{name: "PopCountInt64x4", argLength: 1, commutative: false},
+		{name: "RotateLeftInt64x4", argLength: 2, commutative: false},
+		{name: "RotateRightInt64x4", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftInt64x4", argLength: 2, commutative: false},
+		{name: "ShiftAllRightInt64x4", argLength: 2, commutative: false},
+		{name: "ShiftAllRightSignExtendedInt64x4", argLength: 2, commutative: false},
+		{name: "ShiftLeftInt64x4", argLength: 2, commutative: false},
+		{name: "ShiftLeftAndFillUpperFromInt64x4", argLength: 3, commutative: false},
+		{name: "ShiftRightInt64x4", argLength: 2, commutative: false},
+		{name: "ShiftRightAndFillUpperFromInt64x4", argLength: 3, commutative: false},
+		{name: "ShiftRightSignExtendedInt64x4", argLength: 2, commutative: false},
 		{name: "SubInt64x4", argLength: 2, commutative: false},
 		{name: "XorInt64x4", argLength: 2, commutative: true},
 		{name: "AbsoluteInt64x8", argLength: 1, commutative: false},
@@ -671,6 +795,16 @@ func simdGenericOps() []opData {
 		{name: "MaskedNotEqualInt64x8", argLength: 3, commutative: true},
 		{name: "MaskedOrInt64x8", argLength: 3, commutative: true},
 		{name: "MaskedPopCountInt64x8", argLength: 2, commutative: false},
+		{name: "MaskedRotateLeftInt64x8", argLength: 3, commutative: false},
+		{name: "MaskedRotateRightInt64x8", argLength: 3, commutative: false},
+		{name: "MaskedShiftAllLeftInt64x8", argLength: 3, commutative: false},
+		{name: "MaskedShiftAllRightInt64x8", argLength: 3, commutative: false},
+		{name: "MaskedShiftAllRightSignExtendedInt64x8", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftInt64x8", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftAndFillUpperFromInt64x8", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightInt64x8", argLength: 3, commutative: false},
+		{name: "MaskedShiftRightAndFillUpperFromInt64x8", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightSignExtendedInt64x8", argLength: 3, commutative: false},
 		{name: "MaskedSubInt64x8", argLength: 3, commutative: false},
 		{name: "MaskedXorInt64x8", argLength: 3, commutative: true},
 		{name: "MaxInt64x8", argLength: 2, commutative: true},
@@ -680,6 +814,16 @@ func simdGenericOps() []opData {
 		{name: "NotEqualInt64x8", argLength: 2, commutative: true},
 		{name: "OrInt64x8", argLength: 2, commutative: true},
 		{name: "PopCountInt64x8", argLength: 1, commutative: false},
+		{name: "RotateLeftInt64x8", argLength: 2, commutative: false},
+		{name: "RotateRightInt64x8", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftInt64x8", argLength: 2, commutative: false},
+		{name: "ShiftAllRightInt64x8", argLength: 2, commutative: false},
+		{name: "ShiftAllRightSignExtendedInt64x8", argLength: 2, commutative: false},
+		{name: "ShiftLeftInt64x8", argLength: 2, commutative: false},
+		{name: "ShiftLeftAndFillUpperFromInt64x8", argLength: 3, commutative: false},
+		{name: "ShiftRightInt64x8", argLength: 2, commutative: false},
+		{name: "ShiftRightAndFillUpperFromInt64x8", argLength: 3, commutative: false},
+		{name: "ShiftRightSignExtendedInt64x8", argLength: 2, commutative: false},
 		{name: "SubInt64x8", argLength: 2, commutative: false},
 		{name: "XorInt64x8", argLength: 2, commutative: true},
 		{name: "AbsoluteInt8x16", argLength: 1, commutative: false},
@@ -799,6 +943,11 @@ func simdGenericOps() []opData {
 		{name: "MaskedPopCountUint16x16", argLength: 2, commutative: false},
 		{name: "MaskedSaturatedAddUint16x16", argLength: 3, commutative: true},
 		{name: "MaskedSaturatedSubUint16x16", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftUint16x16", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftAndFillUpperFromUint16x16", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightUint16x16", argLength: 3, commutative: false},
+		{name: "MaskedShiftRightAndFillUpperFromUint16x16", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightSignExtendedUint16x16", argLength: 3, commutative: false},
 		{name: "MaskedSubUint16x16", argLength: 3, commutative: false},
 		{name: "MaxUint16x16", argLength: 2, commutative: true},
 		{name: "MinUint16x16", argLength: 2, commutative: true},
@@ -810,6 +959,13 @@ func simdGenericOps() []opData {
 		{name: "PopCountUint16x16", argLength: 1, commutative: false},
 		{name: "SaturatedAddUint16x16", argLength: 2, commutative: true},
 		{name: "SaturatedSubUint16x16", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftUint16x16", argLength: 2, commutative: false},
+		{name: "ShiftAllRightUint16x16", argLength: 2, commutative: false},
+		{name: "ShiftLeftUint16x16", argLength: 2, commutative: false},
+		{name: "ShiftLeftAndFillUpperFromUint16x16", argLength: 3, commutative: false},
+		{name: "ShiftRightUint16x16", argLength: 2, commutative: false},
+		{name: "ShiftRightAndFillUpperFromUint16x16", argLength: 3, commutative: false},
+		{name: "ShiftRightSignExtendedUint16x16", argLength: 2, commutative: false},
 		{name: "SubUint16x16", argLength: 2, commutative: false},
 		{name: "XorUint16x16", argLength: 2, commutative: true},
 		{name: "AddUint16x32", argLength: 2, commutative: true},
@@ -833,6 +989,11 @@ func simdGenericOps() []opData {
 		{name: "MaskedPopCountUint16x32", argLength: 2, commutative: false},
 		{name: "MaskedSaturatedAddUint16x32", argLength: 3, commutative: true},
 		{name: "MaskedSaturatedSubUint16x32", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftUint16x32", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftAndFillUpperFromUint16x32", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightUint16x32", argLength: 3, commutative: false},
+		{name: "MaskedShiftRightAndFillUpperFromUint16x32", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightSignExtendedUint16x32", argLength: 3, commutative: false},
 		{name: "MaskedSubUint16x32", argLength: 3, commutative: false},
 		{name: "MaxUint16x32", argLength: 2, commutative: true},
 		{name: "MinUint16x32", argLength: 2, commutative: true},
@@ -841,6 +1002,11 @@ func simdGenericOps() []opData {
 		{name: "PopCountUint16x32", argLength: 1, commutative: false},
 		{name: "SaturatedAddUint16x32", argLength: 2, commutative: true},
 		{name: "SaturatedSubUint16x32", argLength: 2, commutative: false},
+		{name: "ShiftLeftUint16x32", argLength: 2, commutative: false},
+		{name: "ShiftLeftAndFillUpperFromUint16x32", argLength: 3, commutative: false},
+		{name: "ShiftRightUint16x32", argLength: 2, commutative: false},
+		{name: "ShiftRightAndFillUpperFromUint16x32", argLength: 3, commutative: false},
+		{name: "ShiftRightSignExtendedUint16x32", argLength: 2, commutative: false},
 		{name: "SubUint16x32", argLength: 2, commutative: false},
 		{name: "AddUint16x8", argLength: 2, commutative: true},
 		{name: "AndUint16x8", argLength: 2, commutative: true},
@@ -865,6 +1031,11 @@ func simdGenericOps() []opData {
 		{name: "MaskedPopCountUint16x8", argLength: 2, commutative: false},
 		{name: "MaskedSaturatedAddUint16x8", argLength: 3, commutative: true},
 		{name: "MaskedSaturatedSubUint16x8", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftUint16x8", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftAndFillUpperFromUint16x8", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightUint16x8", argLength: 3, commutative: false},
+		{name: "MaskedShiftRightAndFillUpperFromUint16x8", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightSignExtendedUint16x8", argLength: 3, commutative: false},
 		{name: "MaskedSubUint16x8", argLength: 3, commutative: false},
 		{name: "MaxUint16x8", argLength: 2, commutative: true},
 		{name: "MinUint16x8", argLength: 2, commutative: true},
@@ -876,6 +1047,13 @@ func simdGenericOps() []opData {
 		{name: "PopCountUint16x8", argLength: 1, commutative: false},
 		{name: "SaturatedAddUint16x8", argLength: 2, commutative: true},
 		{name: "SaturatedSubUint16x8", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftUint16x8", argLength: 2, commutative: false},
+		{name: "ShiftAllRightUint16x8", argLength: 2, commutative: false},
+		{name: "ShiftLeftUint16x8", argLength: 2, commutative: false},
+		{name: "ShiftLeftAndFillUpperFromUint16x8", argLength: 3, commutative: false},
+		{name: "ShiftRightUint16x8", argLength: 2, commutative: false},
+		{name: "ShiftRightAndFillUpperFromUint16x8", argLength: 3, commutative: false},
+		{name: "ShiftRightSignExtendedUint16x8", argLength: 2, commutative: false},
 		{name: "SubUint16x8", argLength: 2, commutative: false},
 		{name: "XorUint16x8", argLength: 2, commutative: true},
 		{name: "AddUint32x16", argLength: 2, commutative: true},
@@ -899,7 +1077,14 @@ func simdGenericOps() []opData {
 		{name: "MaskedNotEqualUint32x16", argLength: 3, commutative: true},
 		{name: "MaskedOrUint32x16", argLength: 3, commutative: true},
 		{name: "MaskedPopCountUint32x16", argLength: 2, commutative: false},
+		{name: "MaskedRotateLeftUint32x16", argLength: 3, commutative: false},
+		{name: "MaskedRotateRightUint32x16", argLength: 3, commutative: false},
 		{name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 4, commutative: false},
+		{name: "MaskedShiftLeftUint32x16", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftAndFillUpperFromUint32x16", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightUint32x16", argLength: 3, commutative: false},
+		{name: "MaskedShiftRightAndFillUpperFromUint32x16", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightSignExtendedUint32x16", argLength: 3, commutative: false},
 		{name: "MaskedSubUint32x16", argLength: 3, commutative: false},
 		{name: "MaskedUnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 4, commutative: false},
 		{name: "MaskedXorUint32x16", argLength: 3, commutative: true},
@@ -908,7 +1093,14 @@ func simdGenericOps() []opData {
 		{name: "NotEqualUint32x16", argLength: 2, commutative: true},
 		{name: "OrUint32x16", argLength: 2, commutative: true},
 		{name: "PopCountUint32x16", argLength: 1, commutative: false},
+		{name: "RotateLeftUint32x16", argLength: 2, commutative: false},
+		{name: "RotateRightUint32x16", argLength: 2, commutative: false},
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 3, commutative: false},
+		{name: "ShiftLeftUint32x16", argLength: 2, commutative: false},
+		{name: "ShiftLeftAndFillUpperFromUint32x16", argLength: 3, commutative: false},
+		{name: "ShiftRightUint32x16", argLength: 2, commutative: false},
+		{name: "ShiftRightAndFillUpperFromUint32x16", argLength: 3, commutative: false},
+		{name: "ShiftRightSignExtendedUint32x16", argLength: 2, commutative: false},
 		{name: "SubUint32x16", argLength: 2, commutative: false},
 		{name: "UnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 3, commutative: false},
 		{name: "XorUint32x16", argLength: 2, commutative: true},
@@ -933,7 +1125,14 @@ func simdGenericOps() []opData {
 		{name: "MaskedNotEqualUint32x4", argLength: 3, commutative: true},
 		{name: "MaskedOrUint32x4", argLength: 3, commutative: true},
 		{name: "MaskedPopCountUint32x4", argLength: 2, commutative: false},
+		{name: "MaskedRotateLeftUint32x4", argLength: 3, commutative: false},
+		{name: "MaskedRotateRightUint32x4", argLength: 3, commutative: false},
 		{name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 4, commutative: false},
+		{name: "MaskedShiftLeftUint32x4", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftAndFillUpperFromUint32x4", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightUint32x4", argLength: 3, commutative: false},
+		{name: "MaskedShiftRightAndFillUpperFromUint32x4", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightSignExtendedUint32x4", argLength: 3, commutative: false},
 		{name: "MaskedSubUint32x4", argLength: 3, commutative: false},
 		{name: "MaskedUnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 4, commutative: false},
 		{name: "MaskedXorUint32x4", argLength: 3, commutative: true},
@@ -945,7 +1144,16 @@ func simdGenericOps() []opData {
 		{name: "PairwiseAddUint32x4", argLength: 2, commutative: false},
 		{name: "PairwiseSubUint32x4", argLength: 2, commutative: false},
 		{name: "PopCountUint32x4", argLength: 1, commutative: false},
+		{name: "RotateLeftUint32x4", argLength: 2, commutative: false},
+		{name: "RotateRightUint32x4", argLength: 2, commutative: false},
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 3, commutative: false},
+		{name: "ShiftAllLeftUint32x4", argLength: 2, commutative: false},
+		{name: "ShiftAllRightUint32x4", argLength: 2, commutative: false},
+		{name: "ShiftLeftUint32x4", argLength: 2, commutative: false},
+		{name: "ShiftLeftAndFillUpperFromUint32x4", argLength: 3, commutative: false},
+		{name: "ShiftRightUint32x4", argLength: 2, commutative: false},
+		{name: "ShiftRightAndFillUpperFromUint32x4", argLength: 3, commutative: false},
+		{name: "ShiftRightSignExtendedUint32x4", argLength: 2, commutative: false},
 		{name: "SubUint32x4", argLength: 2, commutative: false},
 		{name: "UnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 3, commutative: false},
 		{name: "XorUint32x4", argLength: 2, commutative: true},
@@ -970,7 +1178,14 @@ func simdGenericOps() []opData {
 		{name: "MaskedNotEqualUint32x8", argLength: 3, commutative: true},
 		{name: "MaskedOrUint32x8", argLength: 3, commutative: true},
 		{name: "MaskedPopCountUint32x8", argLength: 2, commutative: false},
+		{name: "MaskedRotateLeftUint32x8", argLength: 3, commutative: false},
+		{name: "MaskedRotateRightUint32x8", argLength: 3, commutative: false},
 		{name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 4, commutative: false},
+		{name: "MaskedShiftLeftUint32x8", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftAndFillUpperFromUint32x8", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightUint32x8", argLength: 3, commutative: false},
+		{name: "MaskedShiftRightAndFillUpperFromUint32x8", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightSignExtendedUint32x8", argLength: 3, commutative: false},
 		{name: "MaskedSubUint32x8", argLength: 3, commutative: false},
 		{name: "MaskedUnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 4, commutative: false},
 		{name: "MaskedXorUint32x8", argLength: 3, commutative: true},
@@ -982,7 +1197,16 @@ func simdGenericOps() []opData {
 		{name: "PairwiseAddUint32x8", argLength: 2, commutative: false},
 		{name: "PairwiseSubUint32x8", argLength: 2, commutative: false},
 		{name: "PopCountUint32x8", argLength: 1, commutative: false},
+		{name: "RotateLeftUint32x8", argLength: 2, commutative: false},
+		{name: "RotateRightUint32x8", argLength: 2, commutative: false},
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 3, commutative: false},
+		{name: "ShiftAllLeftUint32x8", argLength: 2, commutative: false},
+		{name: "ShiftAllRightUint32x8", argLength: 2, commutative: false},
+		{name: "ShiftLeftUint32x8", argLength: 2, commutative: false},
+		{name: "ShiftLeftAndFillUpperFromUint32x8", argLength: 3, commutative: false},
+		{name: "ShiftRightUint32x8", argLength: 2, commutative: false},
+		{name: "ShiftRightAndFillUpperFromUint32x8", argLength: 3, commutative: false},
+		{name: "ShiftRightSignExtendedUint32x8", argLength: 2, commutative: false},
 		{name: "SubUint32x8", argLength: 2, commutative: false},
 		{name: "UnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 3, commutative: false},
 		{name: "XorUint32x8", argLength: 2, commutative: true},
@@ -1008,6 +1232,15 @@ func simdGenericOps() []opData {
 		{name: "MaskedNotEqualUint64x2", argLength: 3, commutative: true},
 		{name: "MaskedOrUint64x2", argLength: 3, commutative: true},
 		{name: "MaskedPopCountUint64x2", argLength: 2, commutative: false},
+		{name: "MaskedRotateLeftUint64x2", argLength: 3, commutative: false},
+		{name: "MaskedRotateRightUint64x2", argLength: 3, commutative: false},
+		{name: "MaskedShiftAllLeftUint64x2", argLength: 3, commutative: false},
+		{name: "MaskedShiftAllRightUint64x2", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftUint64x2", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftAndFillUpperFromUint64x2", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightUint64x2", argLength: 3, commutative: false},
+		{name: "MaskedShiftRightAndFillUpperFromUint64x2", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightSignExtendedUint64x2", argLength: 3, commutative: false},
 		{name: "MaskedSubUint64x2", argLength: 3, commutative: false},
 		{name: "MaskedXorUint64x2", argLength: 3, commutative: true},
 		{name: "MaxUint64x2", argLength: 2, commutative: true},
@@ -1016,6 +1249,15 @@ func simdGenericOps() []opData {
 		{name: "NotEqualUint64x2", argLength: 2, commutative: true},
 		{name: "OrUint64x2", argLength: 2, commutative: true},
 		{name: "PopCountUint64x2", argLength: 1, commutative: false},
+		{name: "RotateLeftUint64x2", argLength: 2, commutative: false},
+		{name: "RotateRightUint64x2", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftUint64x2", argLength: 2, commutative: false},
+		{name: "ShiftAllRightUint64x2", argLength: 2, commutative: false},
+		{name: "ShiftLeftUint64x2", argLength: 2, commutative: false},
+		{name: "ShiftLeftAndFillUpperFromUint64x2", argLength: 3, commutative: false},
+		{name: "ShiftRightUint64x2", argLength: 2, commutative: false},
+		{name: "ShiftRightAndFillUpperFromUint64x2", argLength: 3, commutative: false},
+		{name: "ShiftRightSignExtendedUint64x2", argLength: 2, commutative: false},
 		{name: "SubUint64x2", argLength: 2, commutative: false},
 		{name: "XorUint64x2", argLength: 2, commutative: true},
 		{name: "AddUint64x4", argLength: 2, commutative: true},
@@ -1040,6 +1282,15 @@ func simdGenericOps() []opData {
 		{name: "MaskedNotEqualUint64x4", argLength: 3, commutative: true},
 		{name: "MaskedOrUint64x4", argLength: 3, commutative: true},
 		{name: "MaskedPopCountUint64x4", argLength: 2, commutative: false},
+		{name: "MaskedRotateLeftUint64x4", argLength: 3, commutative: false},
+		{name: "MaskedRotateRightUint64x4", argLength: 3, commutative: false},
+		{name: "MaskedShiftAllLeftUint64x4", argLength: 3, commutative: false},
+		{name: "MaskedShiftAllRightUint64x4", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftUint64x4", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftAndFillUpperFromUint64x4", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightUint64x4", argLength: 3, commutative: false},
+		{name: "MaskedShiftRightAndFillUpperFromUint64x4", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightSignExtendedUint64x4", argLength: 3, commutative: false},
 		{name: "MaskedSubUint64x4", argLength: 3, commutative: false},
 		{name: "MaskedXorUint64x4", argLength: 3, commutative: true},
 		{name: "MaxUint64x4", argLength: 2, commutative: true},
@@ -1048,6 +1299,15 @@ func simdGenericOps() []opData {
 		{name: "NotEqualUint64x4", argLength: 2, commutative: true},
 		{name: "OrUint64x4", argLength: 2, commutative: true},
 		{name: "PopCountUint64x4", argLength: 1, commutative: false},
+		{name: "RotateLeftUint64x4", argLength: 2, commutative: false},
+		{name: "RotateRightUint64x4", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftUint64x4", argLength: 2, commutative: false},
+		{name: "ShiftAllRightUint64x4", argLength: 2, commutative: false},
+		{name: "ShiftLeftUint64x4", argLength: 2, commutative: false},
+		{name: "ShiftLeftAndFillUpperFromUint64x4", argLength: 3, commutative: false},
+		{name: "ShiftRightUint64x4", argLength: 2, commutative: false},
+		{name: "ShiftRightAndFillUpperFromUint64x4", argLength: 3, commutative: false},
+		{name: "ShiftRightSignExtendedUint64x4", argLength: 2, commutative: false},
 		{name: "SubUint64x4", argLength: 2, commutative: false},
 		{name: "XorUint64x4", argLength: 2, commutative: true},
 		{name: "AddUint64x8", argLength: 2, commutative: true},
@@ -1072,6 +1332,15 @@ func simdGenericOps() []opData {
 		{name: "MaskedNotEqualUint64x8", argLength: 3, commutative: true},
 		{name: "MaskedOrUint64x8", argLength: 3, commutative: true},
 		{name: "MaskedPopCountUint64x8", argLength: 2, commutative: false},
+		{name: "MaskedRotateLeftUint64x8", argLength: 3, commutative: false},
+		{name: "MaskedRotateRightUint64x8", argLength: 3, commutative: false},
+		{name: "MaskedShiftAllLeftUint64x8", argLength: 3, commutative: false},
+		{name: "MaskedShiftAllRightUint64x8", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftUint64x8", argLength: 3, commutative: false},
+		{name: "MaskedShiftLeftAndFillUpperFromUint64x8", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightUint64x8", argLength: 3, commutative: false},
+		{name: "MaskedShiftRightAndFillUpperFromUint64x8", argLength: 4, commutative: false},
+		{name: "MaskedShiftRightSignExtendedUint64x8", argLength: 3, commutative: false},
 		{name: "MaskedSubUint64x8", argLength: 3, commutative: false},
 		{name: "MaskedXorUint64x8", argLength: 3, commutative: true},
 		{name: "MaxUint64x8", argLength: 2, commutative: true},
@@ -1080,6 +1349,15 @@ func simdGenericOps() []opData {
 		{name: "NotEqualUint64x8", argLength: 2, commutative: true},
 		{name: "OrUint64x8", argLength: 2, commutative: true},
 		{name: "PopCountUint64x8", argLength: 1, commutative: false},
+		{name: "RotateLeftUint64x8", argLength: 2, commutative: false},
+		{name: "RotateRightUint64x8", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftUint64x8", argLength: 2, commutative: false},
+		{name: "ShiftAllRightUint64x8", argLength: 2, commutative: false},
+		{name: "ShiftLeftUint64x8", argLength: 2, commutative: false},
+		{name: "ShiftLeftAndFillUpperFromUint64x8", argLength: 3, commutative: false},
+		{name: "ShiftRightUint64x8", argLength: 2, commutative: false},
+		{name: "ShiftRightAndFillUpperFromUint64x8", argLength: 3, commutative: false},
+		{name: "ShiftRightSignExtendedUint64x8", argLength: 2, commutative: false},
 		{name: "SubUint64x8", argLength: 2, commutative: false},
 		{name: "XorUint64x8", argLength: 2, commutative: true},
 		{name: "AddUint8x16", argLength: 2, commutative: true},
@@ -1372,20 +1650,140 @@ func simdGenericOps() []opData {
 		{name: "RoundWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
 		{name: "TruncSuppressExceptionWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
 		{name: "TruncWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllLeftAndFillUpperFromInt16x16", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllRightAndFillUpperFromInt16x16", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftAndFillUpperFromInt16x16", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightAndFillUpperFromInt16x16", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllLeftAndFillUpperFromInt16x32", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllRightAndFillUpperFromInt16x32", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftAndFillUpperFromInt16x32", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightAndFillUpperFromInt16x32", argLength: 2, commutative: false, aux: "Int8"},
 		{name: "GetElemInt16x8", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllLeftAndFillUpperFromInt16x8", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllRightAndFillUpperFromInt16x8", argLength: 3, commutative: false, aux: "Int8"},
 		{name: "SetElemInt16x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftAndFillUpperFromInt16x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightAndFillUpperFromInt16x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedRotateAllLeftInt32x16", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedRotateAllRightInt32x16", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllLeftAndFillUpperFromInt32x16", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllRightAndFillUpperFromInt32x16", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "RotateAllLeftInt32x16", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "RotateAllRightInt32x16", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftAndFillUpperFromInt32x16", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightAndFillUpperFromInt32x16", argLength: 2, commutative: false, aux: "Int8"},
 		{name: "GetElemInt32x4", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "MaskedRotateAllLeftInt32x4", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedRotateAllRightInt32x4", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllLeftAndFillUpperFromInt32x4", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllRightAndFillUpperFromInt32x4", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "RotateAllLeftInt32x4", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "RotateAllRightInt32x4", argLength: 1, commutative: false, aux: "Int8"},
 		{name: "SetElemInt32x4", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftAndFillUpperFromInt32x4", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightAndFillUpperFromInt32x4", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedRotateAllLeftInt32x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedRotateAllRightInt32x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllLeftAndFillUpperFromInt32x8", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllRightAndFillUpperFromInt32x8", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "RotateAllLeftInt32x8", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "RotateAllRightInt32x8", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftAndFillUpperFromInt32x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightAndFillUpperFromInt32x8", argLength: 2, commutative: false, aux: "Int8"},
 		{name: "GetElemInt64x2", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "MaskedRotateAllLeftInt64x2", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedRotateAllRightInt64x2", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllLeftAndFillUpperFromInt64x2", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllRightAndFillUpperFromInt64x2", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "RotateAllLeftInt64x2", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "RotateAllRightInt64x2", argLength: 1, commutative: false, aux: "Int8"},
 		{name: "SetElemInt64x2", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftAndFillUpperFromInt64x2", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightAndFillUpperFromInt64x2", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedRotateAllLeftInt64x4", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedRotateAllRightInt64x4", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllLeftAndFillUpperFromInt64x4", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllRightAndFillUpperFromInt64x4", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "RotateAllLeftInt64x4", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "RotateAllRightInt64x4", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftAndFillUpperFromInt64x4", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightAndFillUpperFromInt64x4", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedRotateAllLeftInt64x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedRotateAllRightInt64x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllLeftAndFillUpperFromInt64x8", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllRightAndFillUpperFromInt64x8", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "RotateAllLeftInt64x8", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "RotateAllRightInt64x8", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftAndFillUpperFromInt64x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightAndFillUpperFromInt64x8", argLength: 2, commutative: false, aux: "Int8"},
 		{name: "GetElemInt8x16", argLength: 1, commutative: false, aux: "Int8"},
 		{name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllLeftAndFillUpperFromUint16x16", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllRightAndFillUpperFromUint16x16", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftAndFillUpperFromUint16x16", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightAndFillUpperFromUint16x16", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllLeftAndFillUpperFromUint16x32", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllRightAndFillUpperFromUint16x32", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftAndFillUpperFromUint16x32", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightAndFillUpperFromUint16x32", argLength: 2, commutative: false, aux: "Int8"},
 		{name: "GetElemUint16x8", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllLeftAndFillUpperFromUint16x8", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllRightAndFillUpperFromUint16x8", argLength: 3, commutative: false, aux: "Int8"},
 		{name: "SetElemUint16x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftAndFillUpperFromUint16x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightAndFillUpperFromUint16x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedRotateAllLeftUint32x16", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedRotateAllRightUint32x16", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllLeftAndFillUpperFromUint32x16", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllRightAndFillUpperFromUint32x16", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "RotateAllLeftUint32x16", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "RotateAllRightUint32x16", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftAndFillUpperFromUint32x16", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightAndFillUpperFromUint32x16", argLength: 2, commutative: false, aux: "Int8"},
 		{name: "GetElemUint32x4", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "MaskedRotateAllLeftUint32x4", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedRotateAllRightUint32x4", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllLeftAndFillUpperFromUint32x4", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllRightAndFillUpperFromUint32x4", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "RotateAllLeftUint32x4", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "RotateAllRightUint32x4", argLength: 1, commutative: false, aux: "Int8"},
 		{name: "SetElemUint32x4", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftAndFillUpperFromUint32x4", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightAndFillUpperFromUint32x4", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedRotateAllLeftUint32x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedRotateAllRightUint32x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllLeftAndFillUpperFromUint32x8", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllRightAndFillUpperFromUint32x8", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "RotateAllLeftUint32x8", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "RotateAllRightUint32x8", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftAndFillUpperFromUint32x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightAndFillUpperFromUint32x8", argLength: 2, commutative: false, aux: "Int8"},
 		{name: "GetElemUint64x2", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "MaskedRotateAllLeftUint64x2", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedRotateAllRightUint64x2", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllLeftAndFillUpperFromUint64x2", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllRightAndFillUpperFromUint64x2", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "RotateAllLeftUint64x2", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "RotateAllRightUint64x2", argLength: 1, commutative: false, aux: "Int8"},
 		{name: "SetElemUint64x2", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftAndFillUpperFromUint64x2", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightAndFillUpperFromUint64x2", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedRotateAllLeftUint64x4", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedRotateAllRightUint64x4", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllLeftAndFillUpperFromUint64x4", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllRightAndFillUpperFromUint64x4", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "RotateAllLeftUint64x4", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "RotateAllRightUint64x4", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftAndFillUpperFromUint64x4", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightAndFillUpperFromUint64x4", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedRotateAllLeftUint64x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedRotateAllRightUint64x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllLeftAndFillUpperFromUint64x8", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "MaskedShiftAllRightAndFillUpperFromUint64x8", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "RotateAllLeftUint64x8", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "RotateAllRightUint64x8", argLength: 1, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftAndFillUpperFromUint64x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightAndFillUpperFromUint64x8", argLength: 2, commutative: false, aux: "Int8"},
 		{name: "GetElemUint8x16", argLength: 1, commutative: false, aux: "Int8"},
 		{name: "SetElemUint8x16", argLength: 2, commutative: false, aux: "Int8"},
 	}
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 7a1126d433f..2bdbd5156e1 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -1426,6 +1426,11 @@ const (
 	OpAMD64VPOPCNTWMasked256
 	OpAMD64VPADDSWMasked256
 	OpAMD64VPSUBSWMasked256
+	OpAMD64VPSLLVWMasked256
+	OpAMD64VPSHLDVWMasked256
+	OpAMD64VPSRLVWMasked256
+	OpAMD64VPSHRDVWMasked256
+	OpAMD64VPSRAVWMasked256
 	OpAMD64VPSUBWMasked256
 	OpAMD64VPMAXSW256
 	OpAMD64VPMINSW256
@@ -1439,6 +1444,14 @@ const (
 	OpAMD64VPHADDSW256
 	OpAMD64VPHSUBSW256
 	OpAMD64VPSUBSW256
+	OpAMD64VPSLLW256
+	OpAMD64VPSRLW256
+	OpAMD64VPSRAW256
+	OpAMD64VPSLLVW256
+	OpAMD64VPSHLDVW256
+	OpAMD64VPSRLVW256
+	OpAMD64VPSHRDVW256
+	OpAMD64VPSRAVW256
 	OpAMD64VPSIGNW256
 	OpAMD64VPSUBW256
 	OpAMD64VPABSW512
@@ -1453,6 +1466,11 @@ const (
 	OpAMD64VPOPCNTWMasked512
 	OpAMD64VPADDSWMasked512
 	OpAMD64VPSUBSWMasked512
+	OpAMD64VPSLLVWMasked512
+	OpAMD64VPSHLDVWMasked512
+	OpAMD64VPSRLVWMasked512
+	OpAMD64VPSHRDVWMasked512
+	OpAMD64VPSRAVWMasked512
 	OpAMD64VPSUBWMasked512
 	OpAMD64VPMAXSW512
 	OpAMD64VPMINSW512
@@ -1462,6 +1480,11 @@ const (
 	OpAMD64VPOPCNTW512
 	OpAMD64VPADDSW512
 	OpAMD64VPSUBSW512
+	OpAMD64VPSLLVW512
+	OpAMD64VPSHLDVW512
+	OpAMD64VPSRLVW512
+	OpAMD64VPSHRDVW512
+	OpAMD64VPSRAVW512
 	OpAMD64VPSUBW512
 	OpAMD64VPABSW128
 	OpAMD64VPADDW128
@@ -1477,6 +1500,11 @@ const (
 	OpAMD64VPOPCNTWMasked128
 	OpAMD64VPADDSWMasked128
 	OpAMD64VPSUBSWMasked128
+	OpAMD64VPSLLVWMasked128
+	OpAMD64VPSHLDVWMasked128
+	OpAMD64VPSRLVWMasked128
+	OpAMD64VPSHRDVWMasked128
+	OpAMD64VPSRAVWMasked128
 	OpAMD64VPSUBWMasked128
 	OpAMD64VPMAXSW128
 	OpAMD64VPMINSW128
@@ -1490,6 +1518,14 @@ const (
 	OpAMD64VPHADDSW128
 	OpAMD64VPHSUBSW128
 	OpAMD64VPSUBSW128
+	OpAMD64VPSLLW128
+	OpAMD64VPSRLW128
+	OpAMD64VPSRAW128
+	OpAMD64VPSLLVW128
+	OpAMD64VPSHLDVW128
+	OpAMD64VPSRLVW128
+	OpAMD64VPSHRDVW128
+	OpAMD64VPSRAVW128
 	OpAMD64VPSIGNW128
 	OpAMD64VPSUBW128
 	OpAMD64VPABSD512
@@ -1506,8 +1542,15 @@ const (
 	OpAMD64VPORDMasked512
 	OpAMD64VPDPWSSDMasked512
 	OpAMD64VPOPCNTDMasked512
+	OpAMD64VPROLVDMasked512
+	OpAMD64VPRORVDMasked512
 	OpAMD64VPDPWSSDSMasked512
 	OpAMD64VPDPBUSDSMasked512
+	OpAMD64VPSLLVDMasked512
+	OpAMD64VPSHLDVDMasked512
+	OpAMD64VPSRLVDMasked512
+	OpAMD64VPSHRDVDMasked512
+	OpAMD64VPSRAVDMasked512
 	OpAMD64VPSUBDMasked512
 	OpAMD64VPDPBUSDMasked512
 	OpAMD64VPXORDMasked512
@@ -1517,8 +1560,15 @@ const (
 	OpAMD64VPORD512
 	OpAMD64VPDPWSSD512
 	OpAMD64VPOPCNTD512
+	OpAMD64VPROLVD512
+	OpAMD64VPRORVD512
 	OpAMD64VPDPWSSDS512
 	OpAMD64VPDPBUSDS512
+	OpAMD64VPSLLVD512
+	OpAMD64VPSHLDVD512
+	OpAMD64VPSRLVD512
+	OpAMD64VPSHRDVD512
+	OpAMD64VPSRAVD512
 	OpAMD64VPSUBD512
 	OpAMD64VPDPBUSD512
 	OpAMD64VPXORD512
@@ -1536,8 +1586,15 @@ const (
 	OpAMD64VPORDMasked128
 	OpAMD64VPDPWSSDMasked128
 	OpAMD64VPOPCNTDMasked128
+	OpAMD64VPROLVDMasked128
+	OpAMD64VPRORVDMasked128
 	OpAMD64VPDPWSSDSMasked128
 	OpAMD64VPDPBUSDSMasked128
+	OpAMD64VPSLLVDMasked128
+	OpAMD64VPSHLDVDMasked128
+	OpAMD64VPSRLVDMasked128
+	OpAMD64VPSHRDVDMasked128
+	OpAMD64VPSRAVDMasked128
 	OpAMD64VPSUBDMasked128
 	OpAMD64VPDPBUSDMasked128
 	OpAMD64VPXORDMasked128
@@ -1549,8 +1606,18 @@ const (
 	OpAMD64VPHADDD128
 	OpAMD64VPHSUBD128
 	OpAMD64VPOPCNTD128
+	OpAMD64VPROLVD128
+	OpAMD64VPRORVD128
 	OpAMD64VPDPWSSDS128
 	OpAMD64VPDPBUSDS128
+	OpAMD64VPSLLD128
+	OpAMD64VPSRLD128
+	OpAMD64VPSRAD128
+	OpAMD64VPSLLVD128
+	OpAMD64VPSHLDVD128
+	OpAMD64VPSRLVD128
+	OpAMD64VPSHRDVD128
+	OpAMD64VPSRAVD128
 	OpAMD64VPSIGND128
 	OpAMD64VPSUBD128
 	OpAMD64VPDPBUSD128
@@ -1568,8 +1635,15 @@ const (
 	OpAMD64VPORDMasked256
 	OpAMD64VPDPWSSDMasked256
 	OpAMD64VPOPCNTDMasked256
+	OpAMD64VPROLVDMasked256
+	OpAMD64VPRORVDMasked256
 	OpAMD64VPDPWSSDSMasked256
 	OpAMD64VPDPBUSDSMasked256
+	OpAMD64VPSLLVDMasked256
+	OpAMD64VPSHLDVDMasked256
+	OpAMD64VPSRLVDMasked256
+	OpAMD64VPSHRDVDMasked256
+	OpAMD64VPSRAVDMasked256
 	OpAMD64VPSUBDMasked256
 	OpAMD64VPDPBUSDMasked256
 	OpAMD64VPXORDMasked256
@@ -1581,8 +1655,18 @@ const (
 	OpAMD64VPHADDD256
 	OpAMD64VPHSUBD256
 	OpAMD64VPOPCNTD256
+	OpAMD64VPROLVD256
+	OpAMD64VPRORVD256
 	OpAMD64VPDPWSSDS256
 	OpAMD64VPDPBUSDS256
+	OpAMD64VPSLLD256
+	OpAMD64VPSRLD256
+	OpAMD64VPSRAD256
+	OpAMD64VPSLLVD256
+	OpAMD64VPSHLDVD256
+	OpAMD64VPSRLVD256
+	OpAMD64VPSHRDVD256
+	OpAMD64VPSRAVD256
 	OpAMD64VPSIGND256
 	OpAMD64VPSUBD256
 	OpAMD64VPDPBUSD256
@@ -1599,12 +1683,32 @@ const (
 	OpAMD64VPMULLQMasked128
 	OpAMD64VPORQMasked128
 	OpAMD64VPOPCNTQMasked128
+	OpAMD64VPROLVQMasked128
+	OpAMD64VPRORVQMasked128
+	OpAMD64VPSLLQMasked128
+	OpAMD64VPSRLQMasked128
+	OpAMD64VPSRAQMasked128
+	OpAMD64VPSLLVQMasked128
+	OpAMD64VPSHLDVQMasked128
+	OpAMD64VPSRLVQMasked128
+	OpAMD64VPSHRDVQMasked128
+	OpAMD64VPSRAVQMasked128
 	OpAMD64VPSUBQMasked128
 	OpAMD64VPXORQMasked128
 	OpAMD64VPMAXSQ128
 	OpAMD64VPMINSQ128
 	OpAMD64VPMULLQ128
 	OpAMD64VPOPCNTQ128
+	OpAMD64VPROLVQ128
+	OpAMD64VPRORVQ128
+	OpAMD64VPSLLQ128
+	OpAMD64VPSRLQ128
+	OpAMD64VPSRAQ128
+	OpAMD64VPSLLVQ128
+	OpAMD64VPSHLDVQ128
+	OpAMD64VPSRLVQ128
+	OpAMD64VPSHRDVQ128
+	OpAMD64VPSRAVQ128
 	OpAMD64VPSUBQ128
 	OpAMD64VPABSQ256
 	OpAMD64VPADDQ256
@@ -1620,12 +1724,32 @@ const (
 	OpAMD64VPMULLQMasked256
 	OpAMD64VPORQMasked256
 	OpAMD64VPOPCNTQMasked256
+	OpAMD64VPROLVQMasked256
+	OpAMD64VPRORVQMasked256
+	OpAMD64VPSLLQMasked256
+	OpAMD64VPSRLQMasked256
+	OpAMD64VPSRAQMasked256
+	OpAMD64VPSLLVQMasked256
+	OpAMD64VPSHLDVQMasked256
+	OpAMD64VPSRLVQMasked256
+	OpAMD64VPSHRDVQMasked256
+	OpAMD64VPSRAVQMasked256
 	OpAMD64VPSUBQMasked256
 	OpAMD64VPXORQMasked256
 	OpAMD64VPMAXSQ256
 	OpAMD64VPMINSQ256
 	OpAMD64VPMULLQ256
 	OpAMD64VPOPCNTQ256
+	OpAMD64VPROLVQ256
+	OpAMD64VPRORVQ256
+	OpAMD64VPSLLQ256
+	OpAMD64VPSRLQ256
+	OpAMD64VPSRAQ256
+	OpAMD64VPSLLVQ256
+	OpAMD64VPSHLDVQ256
+	OpAMD64VPSRLVQ256
+	OpAMD64VPSHRDVQ256
+	OpAMD64VPSRAVQ256
 	OpAMD64VPSUBQ256
 	OpAMD64VPABSQ512
 	OpAMD64VPADDQ512
@@ -1641,6 +1765,16 @@ const (
 	OpAMD64VPMULLQMasked512
 	OpAMD64VPORQMasked512
 	OpAMD64VPOPCNTQMasked512
+	OpAMD64VPROLVQMasked512
+	OpAMD64VPRORVQMasked512
+	OpAMD64VPSLLQMasked512
+	OpAMD64VPSRLQMasked512
+	OpAMD64VPSRAQMasked512
+	OpAMD64VPSLLVQMasked512
+	OpAMD64VPSHLDVQMasked512
+	OpAMD64VPSRLVQMasked512
+	OpAMD64VPSHRDVQMasked512
+	OpAMD64VPSRAVQMasked512
 	OpAMD64VPSUBQMasked512
 	OpAMD64VPXORQMasked512
 	OpAMD64VPMAXSQ512
@@ -1649,6 +1783,16 @@ const (
 	OpAMD64VPMULLQ512
 	OpAMD64VPORQ512
 	OpAMD64VPOPCNTQ512
+	OpAMD64VPROLVQ512
+	OpAMD64VPRORVQ512
+	OpAMD64VPSLLQ512
+	OpAMD64VPSRLQ512
+	OpAMD64VPSRAQ512
+	OpAMD64VPSLLVQ512
+	OpAMD64VPSHLDVQ512
+	OpAMD64VPSRLVQ512
+	OpAMD64VPSHRDVQ512
+	OpAMD64VPSRAVQ512
 	OpAMD64VPSUBQ512
 	OpAMD64VPXORQ512
 	OpAMD64VPABSB128
@@ -1834,28 +1978,88 @@ const (
 	OpAMD64VCMPPDMasked512
 	OpAMD64VPCMPW256
 	OpAMD64VPCMPWMasked256
+	OpAMD64VPSHLDWMasked256
+	OpAMD64VPSHRDWMasked256
+	OpAMD64VPSHLDW256
+	OpAMD64VPSHRDW256
 	OpAMD64VPCMPW512
 	OpAMD64VPCMPWMasked512
+	OpAMD64VPSHLDWMasked512
+	OpAMD64VPSHRDWMasked512
+	OpAMD64VPSHLDW512
+	OpAMD64VPSHRDW512
 	OpAMD64VPEXTRW128
 	OpAMD64VPCMPW128
 	OpAMD64VPCMPWMasked128
+	OpAMD64VPSHLDWMasked128
+	OpAMD64VPSHRDWMasked128
 	OpAMD64VPINSRW128
+	OpAMD64VPSHLDW128
+	OpAMD64VPSHRDW128
 	OpAMD64VPCMPD512
 	OpAMD64VPCMPDMasked512
+	OpAMD64VPROLDMasked512
+	OpAMD64VPRORDMasked512
+	OpAMD64VPSHLDDMasked512
+	OpAMD64VPSHRDDMasked512
+	OpAMD64VPROLD512
+	OpAMD64VPRORD512
+	OpAMD64VPSHLDD512
+	OpAMD64VPSHRDD512
 	OpAMD64VPEXTRD128
 	OpAMD64VPCMPD128
 	OpAMD64VPCMPDMasked128
+	OpAMD64VPROLDMasked128
+	OpAMD64VPRORDMasked128
+	OpAMD64VPSHLDDMasked128
+	OpAMD64VPSHRDDMasked128
+	OpAMD64VPROLD128
+	OpAMD64VPRORD128
 	OpAMD64VPINSRD128
+	OpAMD64VPSHLDD128
+	OpAMD64VPSHRDD128
 	OpAMD64VPCMPD256
 	OpAMD64VPCMPDMasked256
+	OpAMD64VPROLDMasked256
+	OpAMD64VPRORDMasked256
+	OpAMD64VPSHLDDMasked256
+	OpAMD64VPSHRDDMasked256
+	OpAMD64VPROLD256
+	OpAMD64VPRORD256
+	OpAMD64VPSHLDD256
+	OpAMD64VPSHRDD256
 	OpAMD64VPEXTRQ128
 	OpAMD64VPCMPQ128
 	OpAMD64VPCMPQMasked128
+	OpAMD64VPROLQMasked128
+	OpAMD64VPRORQMasked128
+	OpAMD64VPSHLDQMasked128
+	OpAMD64VPSHRDQMasked128
+	OpAMD64VPROLQ128
+	OpAMD64VPRORQ128
 	OpAMD64VPINSRQ128
+	OpAMD64VPSHLDQ128
+	OpAMD64VPSHRDQ128
 	OpAMD64VPCMPQ256
 	OpAMD64VPCMPQMasked256
+	OpAMD64VPROLQMasked256
+	OpAMD64VPRORQMasked256
+	OpAMD64VPSHLDQMasked256
+	OpAMD64VPSHRDQMasked256
+	OpAMD64VPROLQ256
+	OpAMD64VPRORQ256
+	OpAMD64VPSHLDQ256
+	OpAMD64VPSHRDQ256
 	OpAMD64VPCMPQ512
 	OpAMD64VPCMPQMasked512
+	OpAMD64VPROLQMasked512
+	OpAMD64VPRORQMasked512
+	OpAMD64VPSHLDQMasked512
+	OpAMD64VPSHRDQMasked512
+	OpAMD64VPROLQ512
+	OpAMD64VPRORQ512
+	OpAMD64VPSHLDQ512
+	OpAMD64VPSHRDQ512
 	OpAMD64VPEXTRB128
 	OpAMD64VPCMPB128
 	OpAMD64VPCMPBMasked128
@@ -4456,6 +4660,11 @@ const (
 	OpMaskedPopCountInt16x16
 	OpMaskedSaturatedAddInt16x16
 	OpMaskedSaturatedSubInt16x16
+	OpMaskedShiftLeftInt16x16
+	OpMaskedShiftLeftAndFillUpperFromInt16x16
+	OpMaskedShiftRightInt16x16
+	OpMaskedShiftRightAndFillUpperFromInt16x16
+	OpMaskedShiftRightSignExtendedInt16x16
 	OpMaskedSubInt16x16
 	OpMaxInt16x16
 	OpMinInt16x16
@@ -4471,6 +4680,14 @@ const (
 	OpSaturatedPairwiseAddInt16x16
 	OpSaturatedPairwiseSubInt16x16
 	OpSaturatedSubInt16x16
+	OpShiftAllLeftInt16x16
+	OpShiftAllRightInt16x16
+	OpShiftAllRightSignExtendedInt16x16
+	OpShiftLeftInt16x16
+	OpShiftLeftAndFillUpperFromInt16x16
+	OpShiftRightInt16x16
+	OpShiftRightAndFillUpperFromInt16x16
+	OpShiftRightSignExtendedInt16x16
 	OpSignInt16x16
 	OpSubInt16x16
 	OpXorInt16x16
@@ -4497,6 +4714,11 @@ const (
 	OpMaskedPopCountInt16x32
 	OpMaskedSaturatedAddInt16x32
 	OpMaskedSaturatedSubInt16x32
+	OpMaskedShiftLeftInt16x32
+	OpMaskedShiftLeftAndFillUpperFromInt16x32
+	OpMaskedShiftRightInt16x32
+	OpMaskedShiftRightAndFillUpperFromInt16x32
+	OpMaskedShiftRightSignExtendedInt16x32
 	OpMaskedSubInt16x32
 	OpMaxInt16x32
 	OpMinInt16x32
@@ -4507,6 +4729,11 @@ const (
 	OpPopCountInt16x32
 	OpSaturatedAddInt16x32
 	OpSaturatedSubInt16x32
+	OpShiftLeftInt16x32
+	OpShiftLeftAndFillUpperFromInt16x32
+	OpShiftRightInt16x32
+	OpShiftRightAndFillUpperFromInt16x32
+	OpShiftRightSignExtendedInt16x32
 	OpSubInt16x32
 	OpAbsoluteInt16x8
 	OpAddInt16x8
@@ -4533,6 +4760,11 @@ const (
 	OpMaskedPopCountInt16x8
 	OpMaskedSaturatedAddInt16x8
 	OpMaskedSaturatedSubInt16x8
+	OpMaskedShiftLeftInt16x8
+	OpMaskedShiftLeftAndFillUpperFromInt16x8
+	OpMaskedShiftRightInt16x8
+	OpMaskedShiftRightAndFillUpperFromInt16x8
+	OpMaskedShiftRightSignExtendedInt16x8
 	OpMaskedSubInt16x8
 	OpMaxInt16x8
 	OpMinInt16x8
@@ -4548,6 +4780,14 @@ const (
 	OpSaturatedPairwiseAddInt16x8
 	OpSaturatedPairwiseSubInt16x8
 	OpSaturatedSubInt16x8
+	OpShiftAllLeftInt16x8
+	OpShiftAllRightInt16x8
+	OpShiftAllRightSignExtendedInt16x8
+	OpShiftLeftInt16x8
+	OpShiftLeftAndFillUpperFromInt16x8
+	OpShiftRightInt16x8
+	OpShiftRightAndFillUpperFromInt16x8
+	OpShiftRightSignExtendedInt16x8
 	OpSignInt16x8
 	OpSubInt16x8
 	OpXorInt16x8
@@ -4576,8 +4816,15 @@ const (
 	OpMaskedOrInt32x16
 	OpMaskedPairDotProdAccumulateInt32x16
 	OpMaskedPopCountInt32x16
+	OpMaskedRotateLeftInt32x16
+	OpMaskedRotateRightInt32x16
 	OpMaskedSaturatedPairDotProdAccumulateInt32x16
 	OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16
+	OpMaskedShiftLeftInt32x16
+	OpMaskedShiftLeftAndFillUpperFromInt32x16
+	OpMaskedShiftRightInt32x16
+	OpMaskedShiftRightAndFillUpperFromInt32x16
+	OpMaskedShiftRightSignExtendedInt32x16
 	OpMaskedSubInt32x16
 	OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x16
 	OpMaskedXorInt32x16
@@ -4588,8 +4835,15 @@ const (
 	OpOrInt32x16
 	OpPairDotProdAccumulateInt32x16
 	OpPopCountInt32x16
+	OpRotateLeftInt32x16
+	OpRotateRightInt32x16
 	OpSaturatedPairDotProdAccumulateInt32x16
 	OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16
+	OpShiftLeftInt32x16
+	OpShiftLeftAndFillUpperFromInt32x16
+	OpShiftRightInt32x16
+	OpShiftRightAndFillUpperFromInt32x16
+	OpShiftRightSignExtendedInt32x16
 	OpSubInt32x16
 	OpUnsignedSignedQuadDotProdAccumulateInt32x16
 	OpXorInt32x16
@@ -4618,8 +4872,15 @@ const (
 	OpMaskedOrInt32x4
 	OpMaskedPairDotProdAccumulateInt32x4
 	OpMaskedPopCountInt32x4
+	OpMaskedRotateLeftInt32x4
+	OpMaskedRotateRightInt32x4
 	OpMaskedSaturatedPairDotProdAccumulateInt32x4
 	OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4
+	OpMaskedShiftLeftInt32x4
+	OpMaskedShiftLeftAndFillUpperFromInt32x4
+	OpMaskedShiftRightInt32x4
+	OpMaskedShiftRightAndFillUpperFromInt32x4
+	OpMaskedShiftRightSignExtendedInt32x4
 	OpMaskedSubInt32x4
 	OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x4
 	OpMaskedXorInt32x4
@@ -4633,8 +4894,18 @@ const (
 	OpPairwiseAddInt32x4
 	OpPairwiseSubInt32x4
 	OpPopCountInt32x4
+	OpRotateLeftInt32x4
+	OpRotateRightInt32x4
 	OpSaturatedPairDotProdAccumulateInt32x4
 	OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4
+	OpShiftAllLeftInt32x4
+	OpShiftAllRightInt32x4
+	OpShiftAllRightSignExtendedInt32x4
+	OpShiftLeftInt32x4
+	OpShiftLeftAndFillUpperFromInt32x4
+	OpShiftRightInt32x4
+	OpShiftRightAndFillUpperFromInt32x4
+	OpShiftRightSignExtendedInt32x4
 	OpSignInt32x4
 	OpSubInt32x4
 	OpUnsignedSignedQuadDotProdAccumulateInt32x4
@@ -4664,8 +4935,15 @@ const (
 	OpMaskedOrInt32x8
 	OpMaskedPairDotProdAccumulateInt32x8
 	OpMaskedPopCountInt32x8
+	OpMaskedRotateLeftInt32x8
+	OpMaskedRotateRightInt32x8
 	OpMaskedSaturatedPairDotProdAccumulateInt32x8
 	OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8
+	OpMaskedShiftLeftInt32x8
+	OpMaskedShiftLeftAndFillUpperFromInt32x8
+	OpMaskedShiftRightInt32x8
+	OpMaskedShiftRightAndFillUpperFromInt32x8
+	OpMaskedShiftRightSignExtendedInt32x8
 	OpMaskedSubInt32x8
 	OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x8
 	OpMaskedXorInt32x8
@@ -4679,8 +4957,18 @@ const (
 	OpPairwiseAddInt32x8
 	OpPairwiseSubInt32x8
 	OpPopCountInt32x8
+	OpRotateLeftInt32x8
+	OpRotateRightInt32x8
 	OpSaturatedPairDotProdAccumulateInt32x8
 	OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8
+	OpShiftAllLeftInt32x8
+	OpShiftAllRightInt32x8
+	OpShiftAllRightSignExtendedInt32x8
+	OpShiftLeftInt32x8
+	OpShiftLeftAndFillUpperFromInt32x8
+	OpShiftRightInt32x8
+	OpShiftRightAndFillUpperFromInt32x8
+	OpShiftRightSignExtendedInt32x8
 	OpSignInt32x8
 	OpSubInt32x8
 	OpUnsignedSignedQuadDotProdAccumulateInt32x8
@@ -4710,6 +4998,16 @@ const (
 	OpMaskedNotEqualInt64x2
 	OpMaskedOrInt64x2
 	OpMaskedPopCountInt64x2
+	OpMaskedRotateLeftInt64x2
+	OpMaskedRotateRightInt64x2
+	OpMaskedShiftAllLeftInt64x2
+	OpMaskedShiftAllRightInt64x2
+	OpMaskedShiftAllRightSignExtendedInt64x2
+	OpMaskedShiftLeftInt64x2
+	OpMaskedShiftLeftAndFillUpperFromInt64x2
+	OpMaskedShiftRightInt64x2
+	OpMaskedShiftRightAndFillUpperFromInt64x2
+	OpMaskedShiftRightSignExtendedInt64x2
 	OpMaskedSubInt64x2
 	OpMaskedXorInt64x2
 	OpMaxInt64x2
@@ -4719,6 +5017,16 @@ const (
 	OpNotEqualInt64x2
 	OpOrInt64x2
 	OpPopCountInt64x2
+	OpRotateLeftInt64x2
+	OpRotateRightInt64x2
+	OpShiftAllLeftInt64x2
+	OpShiftAllRightInt64x2
+	OpShiftAllRightSignExtendedInt64x2
+	OpShiftLeftInt64x2
+	OpShiftLeftAndFillUpperFromInt64x2
+	OpShiftRightInt64x2
+	OpShiftRightAndFillUpperFromInt64x2
+	OpShiftRightSignExtendedInt64x2
 	OpSubInt64x2
 	OpXorInt64x2
 	OpAbsoluteInt64x4
@@ -4746,6 +5054,16 @@ const (
 	OpMaskedNotEqualInt64x4
 	OpMaskedOrInt64x4
 	OpMaskedPopCountInt64x4
+	OpMaskedRotateLeftInt64x4
+	OpMaskedRotateRightInt64x4
+	OpMaskedShiftAllLeftInt64x4
+	OpMaskedShiftAllRightInt64x4
+	OpMaskedShiftAllRightSignExtendedInt64x4
+	OpMaskedShiftLeftInt64x4
+	OpMaskedShiftLeftAndFillUpperFromInt64x4
+	OpMaskedShiftRightInt64x4
+	OpMaskedShiftRightAndFillUpperFromInt64x4
+	OpMaskedShiftRightSignExtendedInt64x4
 	OpMaskedSubInt64x4
 	OpMaskedXorInt64x4
 	OpMaxInt64x4
@@ -4755,6 +5073,16 @@ const (
 	OpNotEqualInt64x4
 	OpOrInt64x4
 	OpPopCountInt64x4
+	OpRotateLeftInt64x4
+	OpRotateRightInt64x4
+	OpShiftAllLeftInt64x4
+	OpShiftAllRightInt64x4
+	OpShiftAllRightSignExtendedInt64x4
+	OpShiftLeftInt64x4
+	OpShiftLeftAndFillUpperFromInt64x4
+	OpShiftRightInt64x4
+	OpShiftRightAndFillUpperFromInt64x4
+	OpShiftRightSignExtendedInt64x4
 	OpSubInt64x4
 	OpXorInt64x4
 	OpAbsoluteInt64x8
@@ -4782,6 +5110,16 @@ const (
 	OpMaskedNotEqualInt64x8
 	OpMaskedOrInt64x8
 	OpMaskedPopCountInt64x8
+	OpMaskedRotateLeftInt64x8
+	OpMaskedRotateRightInt64x8
+	OpMaskedShiftAllLeftInt64x8
+	OpMaskedShiftAllRightInt64x8
+	OpMaskedShiftAllRightSignExtendedInt64x8
+	OpMaskedShiftLeftInt64x8
+	OpMaskedShiftLeftAndFillUpperFromInt64x8
+	OpMaskedShiftRightInt64x8
+	OpMaskedShiftRightAndFillUpperFromInt64x8
+	OpMaskedShiftRightSignExtendedInt64x8
 	OpMaskedSubInt64x8
 	OpMaskedXorInt64x8
 	OpMaxInt64x8
@@ -4791,6 +5129,16 @@ const (
 	OpNotEqualInt64x8
 	OpOrInt64x8
 	OpPopCountInt64x8
+	OpRotateLeftInt64x8
+	OpRotateRightInt64x8
+	OpShiftAllLeftInt64x8
+	OpShiftAllRightInt64x8
+	OpShiftAllRightSignExtendedInt64x8
+	OpShiftLeftInt64x8
+	OpShiftLeftAndFillUpperFromInt64x8
+	OpShiftRightInt64x8
+	OpShiftRightAndFillUpperFromInt64x8
+	OpShiftRightSignExtendedInt64x8
 	OpSubInt64x8
 	OpXorInt64x8
 	OpAbsoluteInt8x16
@@ -4910,6 +5258,11 @@ const (
 	OpMaskedPopCountUint16x16
 	OpMaskedSaturatedAddUint16x16
 	OpMaskedSaturatedSubUint16x16
+	OpMaskedShiftLeftUint16x16
+	OpMaskedShiftLeftAndFillUpperFromUint16x16
+	OpMaskedShiftRightUint16x16
+	OpMaskedShiftRightAndFillUpperFromUint16x16
+	OpMaskedShiftRightSignExtendedUint16x16
 	OpMaskedSubUint16x16
 	OpMaxUint16x16
 	OpMinUint16x16
@@ -4921,6 +5274,13 @@ const (
 	OpPopCountUint16x16
 	OpSaturatedAddUint16x16
 	OpSaturatedSubUint16x16
+	OpShiftAllLeftUint16x16
+	OpShiftAllRightUint16x16
+	OpShiftLeftUint16x16
+	OpShiftLeftAndFillUpperFromUint16x16
+	OpShiftRightUint16x16
+	OpShiftRightAndFillUpperFromUint16x16
+	OpShiftRightSignExtendedUint16x16
 	OpSubUint16x16
 	OpXorUint16x16
 	OpAddUint16x32
@@ -4944,6 +5304,11 @@ const (
 	OpMaskedPopCountUint16x32
 	OpMaskedSaturatedAddUint16x32
 	OpMaskedSaturatedSubUint16x32
+	OpMaskedShiftLeftUint16x32
+	OpMaskedShiftLeftAndFillUpperFromUint16x32
+	OpMaskedShiftRightUint16x32
+	OpMaskedShiftRightAndFillUpperFromUint16x32
+	OpMaskedShiftRightSignExtendedUint16x32
 	OpMaskedSubUint16x32
 	OpMaxUint16x32
 	OpMinUint16x32
@@ -4952,6 +5317,11 @@ const (
 	OpPopCountUint16x32
 	OpSaturatedAddUint16x32
 	OpSaturatedSubUint16x32
+	OpShiftLeftUint16x32
+	OpShiftLeftAndFillUpperFromUint16x32
+	OpShiftRightUint16x32
+	OpShiftRightAndFillUpperFromUint16x32
+	OpShiftRightSignExtendedUint16x32
 	OpSubUint16x32
 	OpAddUint16x8
 	OpAndUint16x8
@@ -4976,6 +5346,11 @@ const (
 	OpMaskedPopCountUint16x8
 	OpMaskedSaturatedAddUint16x8
 	OpMaskedSaturatedSubUint16x8
+	OpMaskedShiftLeftUint16x8
+	OpMaskedShiftLeftAndFillUpperFromUint16x8
+	OpMaskedShiftRightUint16x8
+	OpMaskedShiftRightAndFillUpperFromUint16x8
+	OpMaskedShiftRightSignExtendedUint16x8
 	OpMaskedSubUint16x8
 	OpMaxUint16x8
 	OpMinUint16x8
@@ -4987,6 +5362,13 @@ const (
 	OpPopCountUint16x8
 	OpSaturatedAddUint16x8
 	OpSaturatedSubUint16x8
+	OpShiftAllLeftUint16x8
+	OpShiftAllRightUint16x8
+	OpShiftLeftUint16x8
+	OpShiftLeftAndFillUpperFromUint16x8
+	OpShiftRightUint16x8
+	OpShiftRightAndFillUpperFromUint16x8
+	OpShiftRightSignExtendedUint16x8
 	OpSubUint16x8
 	OpXorUint16x8
 	OpAddUint32x16
@@ -5010,7 +5392,14 @@ const (
 	OpMaskedNotEqualUint32x16
 	OpMaskedOrUint32x16
 	OpMaskedPopCountUint32x16
+	OpMaskedRotateLeftUint32x16
+	OpMaskedRotateRightUint32x16
 	OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16
+	OpMaskedShiftLeftUint32x16
+	OpMaskedShiftLeftAndFillUpperFromUint32x16
+	OpMaskedShiftRightUint32x16
+	OpMaskedShiftRightAndFillUpperFromUint32x16
+	OpMaskedShiftRightSignExtendedUint32x16
 	OpMaskedSubUint32x16
 	OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x16
 	OpMaskedXorUint32x16
@@ -5019,7 +5408,14 @@ const (
 	OpNotEqualUint32x16
 	OpOrUint32x16
 	OpPopCountUint32x16
+	OpRotateLeftUint32x16
+	OpRotateRightUint32x16
 	OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16
+	OpShiftLeftUint32x16
+	OpShiftLeftAndFillUpperFromUint32x16
+	OpShiftRightUint32x16
+	OpShiftRightAndFillUpperFromUint32x16
+	OpShiftRightSignExtendedUint32x16
 	OpSubUint32x16
 	OpUnsignedSignedQuadDotProdAccumulateUint32x16
 	OpXorUint32x16
@@ -5044,7 +5440,14 @@ const (
 	OpMaskedNotEqualUint32x4
 	OpMaskedOrUint32x4
 	OpMaskedPopCountUint32x4
+	OpMaskedRotateLeftUint32x4
+	OpMaskedRotateRightUint32x4
 	OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4
+	OpMaskedShiftLeftUint32x4
+	OpMaskedShiftLeftAndFillUpperFromUint32x4
+	OpMaskedShiftRightUint32x4
+	OpMaskedShiftRightAndFillUpperFromUint32x4
+	OpMaskedShiftRightSignExtendedUint32x4
 	OpMaskedSubUint32x4
 	OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x4
 	OpMaskedXorUint32x4
@@ -5056,7 +5459,16 @@ const (
 	OpPairwiseAddUint32x4
 	OpPairwiseSubUint32x4
 	OpPopCountUint32x4
+	OpRotateLeftUint32x4
+	OpRotateRightUint32x4
 	OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4
+	OpShiftAllLeftUint32x4
+	OpShiftAllRightUint32x4
+	OpShiftLeftUint32x4
+	OpShiftLeftAndFillUpperFromUint32x4
+	OpShiftRightUint32x4
+	OpShiftRightAndFillUpperFromUint32x4
+	OpShiftRightSignExtendedUint32x4
 	OpSubUint32x4
 	OpUnsignedSignedQuadDotProdAccumulateUint32x4
 	OpXorUint32x4
@@ -5081,7 +5493,14 @@ const (
 	OpMaskedNotEqualUint32x8
 	OpMaskedOrUint32x8
 	OpMaskedPopCountUint32x8
+	OpMaskedRotateLeftUint32x8
+	OpMaskedRotateRightUint32x8
 	OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8
+	OpMaskedShiftLeftUint32x8
+	OpMaskedShiftLeftAndFillUpperFromUint32x8
+	OpMaskedShiftRightUint32x8
+	OpMaskedShiftRightAndFillUpperFromUint32x8
+	OpMaskedShiftRightSignExtendedUint32x8
 	OpMaskedSubUint32x8
 	OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x8
 	OpMaskedXorUint32x8
@@ -5093,7 +5512,16 @@ const (
 	OpPairwiseAddUint32x8
 	OpPairwiseSubUint32x8
 	OpPopCountUint32x8
+	OpRotateLeftUint32x8
+	OpRotateRightUint32x8
 	OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8
+	OpShiftAllLeftUint32x8
+	OpShiftAllRightUint32x8
+	OpShiftLeftUint32x8
+	OpShiftLeftAndFillUpperFromUint32x8
+	OpShiftRightUint32x8
+	OpShiftRightAndFillUpperFromUint32x8
+	OpShiftRightSignExtendedUint32x8
 	OpSubUint32x8
 	OpUnsignedSignedQuadDotProdAccumulateUint32x8
 	OpXorUint32x8
@@ -5119,6 +5547,15 @@ const (
 	OpMaskedNotEqualUint64x2
 	OpMaskedOrUint64x2
 	OpMaskedPopCountUint64x2
+	OpMaskedRotateLeftUint64x2
+	OpMaskedRotateRightUint64x2
+	OpMaskedShiftAllLeftUint64x2
+	OpMaskedShiftAllRightUint64x2
+	OpMaskedShiftLeftUint64x2
+	OpMaskedShiftLeftAndFillUpperFromUint64x2
+	OpMaskedShiftRightUint64x2
+	OpMaskedShiftRightAndFillUpperFromUint64x2
+	OpMaskedShiftRightSignExtendedUint64x2
 	OpMaskedSubUint64x2
 	OpMaskedXorUint64x2
 	OpMaxUint64x2
@@ -5127,6 +5564,15 @@ const (
 	OpNotEqualUint64x2
 	OpOrUint64x2
 	OpPopCountUint64x2
+	OpRotateLeftUint64x2
+	OpRotateRightUint64x2
+	OpShiftAllLeftUint64x2
+	OpShiftAllRightUint64x2
+	OpShiftLeftUint64x2
+	OpShiftLeftAndFillUpperFromUint64x2
+	OpShiftRightUint64x2
+	OpShiftRightAndFillUpperFromUint64x2
+	OpShiftRightSignExtendedUint64x2
 	OpSubUint64x2
 	OpXorUint64x2
 	OpAddUint64x4
@@ -5151,6 +5597,15 @@ const (
 	OpMaskedNotEqualUint64x4
 	OpMaskedOrUint64x4
 	OpMaskedPopCountUint64x4
+	OpMaskedRotateLeftUint64x4
+	OpMaskedRotateRightUint64x4
+	OpMaskedShiftAllLeftUint64x4
+	OpMaskedShiftAllRightUint64x4
+	OpMaskedShiftLeftUint64x4
+	OpMaskedShiftLeftAndFillUpperFromUint64x4
+	OpMaskedShiftRightUint64x4
+	OpMaskedShiftRightAndFillUpperFromUint64x4
+	OpMaskedShiftRightSignExtendedUint64x4
 	OpMaskedSubUint64x4
 	OpMaskedXorUint64x4
 	OpMaxUint64x4
@@ -5159,6 +5614,15 @@ const (
 	OpNotEqualUint64x4
 	OpOrUint64x4
 	OpPopCountUint64x4
+	OpRotateLeftUint64x4
+	OpRotateRightUint64x4
+	OpShiftAllLeftUint64x4
+	OpShiftAllRightUint64x4
+	OpShiftLeftUint64x4
+	OpShiftLeftAndFillUpperFromUint64x4
+	OpShiftRightUint64x4
+	OpShiftRightAndFillUpperFromUint64x4
+	OpShiftRightSignExtendedUint64x4
 	OpSubUint64x4
 	OpXorUint64x4
 	OpAddUint64x8
@@ -5183,6 +5647,15 @@ const (
 	OpMaskedNotEqualUint64x8
 	OpMaskedOrUint64x8
 	OpMaskedPopCountUint64x8
+	OpMaskedRotateLeftUint64x8
+	OpMaskedRotateRightUint64x8
+	OpMaskedShiftAllLeftUint64x8
+	OpMaskedShiftAllRightUint64x8
+	OpMaskedShiftLeftUint64x8
+	OpMaskedShiftLeftAndFillUpperFromUint64x8
+	OpMaskedShiftRightUint64x8
+	OpMaskedShiftRightAndFillUpperFromUint64x8
+	OpMaskedShiftRightSignExtendedUint64x8
 	OpMaskedSubUint64x8
 	OpMaskedXorUint64x8
 	OpMaxUint64x8
@@ -5191,6 +5664,15 @@ const (
 	OpNotEqualUint64x8
 	OpOrUint64x8
 	OpPopCountUint64x8
+	OpRotateLeftUint64x8
+	OpRotateRightUint64x8
+	OpShiftAllLeftUint64x8
+	OpShiftAllRightUint64x8
+	OpShiftLeftUint64x8
+	OpShiftLeftAndFillUpperFromUint64x8
+	OpShiftRightUint64x8
+	OpShiftRightAndFillUpperFromUint64x8
+	OpShiftRightSignExtendedUint64x8
 	OpSubUint64x8
 	OpXorUint64x8
 	OpAddUint8x16
@@ -5483,20 +5965,140 @@ const (
 	OpRoundWithPrecisionFloat64x8
 	OpTruncSuppressExceptionWithPrecisionFloat64x8
 	OpTruncWithPrecisionFloat64x8
+	OpMaskedShiftAllLeftAndFillUpperFromInt16x16
+	OpMaskedShiftAllRightAndFillUpperFromInt16x16
+	OpShiftAllLeftAndFillUpperFromInt16x16
+	OpShiftAllRightAndFillUpperFromInt16x16
+	OpMaskedShiftAllLeftAndFillUpperFromInt16x32
+	OpMaskedShiftAllRightAndFillUpperFromInt16x32
+	OpShiftAllLeftAndFillUpperFromInt16x32
+	OpShiftAllRightAndFillUpperFromInt16x32
 	OpGetElemInt16x8
+	OpMaskedShiftAllLeftAndFillUpperFromInt16x8
+	OpMaskedShiftAllRightAndFillUpperFromInt16x8
 	OpSetElemInt16x8
+	OpShiftAllLeftAndFillUpperFromInt16x8
+	OpShiftAllRightAndFillUpperFromInt16x8
+	OpMaskedRotateAllLeftInt32x16
+	OpMaskedRotateAllRightInt32x16
+	OpMaskedShiftAllLeftAndFillUpperFromInt32x16
+	OpMaskedShiftAllRightAndFillUpperFromInt32x16
+	OpRotateAllLeftInt32x16
+	OpRotateAllRightInt32x16
+	OpShiftAllLeftAndFillUpperFromInt32x16
+	OpShiftAllRightAndFillUpperFromInt32x16
 	OpGetElemInt32x4
+	OpMaskedRotateAllLeftInt32x4
+	OpMaskedRotateAllRightInt32x4
+	OpMaskedShiftAllLeftAndFillUpperFromInt32x4
+	OpMaskedShiftAllRightAndFillUpperFromInt32x4
+	OpRotateAllLeftInt32x4
+	OpRotateAllRightInt32x4
 	OpSetElemInt32x4
+	OpShiftAllLeftAndFillUpperFromInt32x4
+	OpShiftAllRightAndFillUpperFromInt32x4
+	OpMaskedRotateAllLeftInt32x8
+	OpMaskedRotateAllRightInt32x8
+	OpMaskedShiftAllLeftAndFillUpperFromInt32x8
+	OpMaskedShiftAllRightAndFillUpperFromInt32x8
+	OpRotateAllLeftInt32x8
+	OpRotateAllRightInt32x8
+	OpShiftAllLeftAndFillUpperFromInt32x8
+	OpShiftAllRightAndFillUpperFromInt32x8
 	OpGetElemInt64x2
+	OpMaskedRotateAllLeftInt64x2
+	OpMaskedRotateAllRightInt64x2
+	OpMaskedShiftAllLeftAndFillUpperFromInt64x2
+	OpMaskedShiftAllRightAndFillUpperFromInt64x2
+	OpRotateAllLeftInt64x2
+	OpRotateAllRightInt64x2
 	OpSetElemInt64x2
+	OpShiftAllLeftAndFillUpperFromInt64x2
+	OpShiftAllRightAndFillUpperFromInt64x2
+	OpMaskedRotateAllLeftInt64x4
+	OpMaskedRotateAllRightInt64x4
+	OpMaskedShiftAllLeftAndFillUpperFromInt64x4
+	OpMaskedShiftAllRightAndFillUpperFromInt64x4
+	OpRotateAllLeftInt64x4
+	OpRotateAllRightInt64x4
+	OpShiftAllLeftAndFillUpperFromInt64x4
+	OpShiftAllRightAndFillUpperFromInt64x4
+	OpMaskedRotateAllLeftInt64x8
+	OpMaskedRotateAllRightInt64x8
+	OpMaskedShiftAllLeftAndFillUpperFromInt64x8
+	OpMaskedShiftAllRightAndFillUpperFromInt64x8
+	OpRotateAllLeftInt64x8
+	OpRotateAllRightInt64x8
+	OpShiftAllLeftAndFillUpperFromInt64x8
+	OpShiftAllRightAndFillUpperFromInt64x8
 	OpGetElemInt8x16
 	OpSetElemInt8x16
+	OpMaskedShiftAllLeftAndFillUpperFromUint16x16
+	OpMaskedShiftAllRightAndFillUpperFromUint16x16
+	OpShiftAllLeftAndFillUpperFromUint16x16
+	OpShiftAllRightAndFillUpperFromUint16x16
+	OpMaskedShiftAllLeftAndFillUpperFromUint16x32
+	OpMaskedShiftAllRightAndFillUpperFromUint16x32
+	OpShiftAllLeftAndFillUpperFromUint16x32
+	OpShiftAllRightAndFillUpperFromUint16x32
 	OpGetElemUint16x8
+	OpMaskedShiftAllLeftAndFillUpperFromUint16x8
+	OpMaskedShiftAllRightAndFillUpperFromUint16x8
 	OpSetElemUint16x8
+	OpShiftAllLeftAndFillUpperFromUint16x8
+	OpShiftAllRightAndFillUpperFromUint16x8
+	OpMaskedRotateAllLeftUint32x16
+	OpMaskedRotateAllRightUint32x16
+	OpMaskedShiftAllLeftAndFillUpperFromUint32x16
+	OpMaskedShiftAllRightAndFillUpperFromUint32x16
+	OpRotateAllLeftUint32x16
+	OpRotateAllRightUint32x16
+	OpShiftAllLeftAndFillUpperFromUint32x16
+	OpShiftAllRightAndFillUpperFromUint32x16
 	OpGetElemUint32x4
+	OpMaskedRotateAllLeftUint32x4
+	OpMaskedRotateAllRightUint32x4
+	OpMaskedShiftAllLeftAndFillUpperFromUint32x4
+	OpMaskedShiftAllRightAndFillUpperFromUint32x4
+	OpRotateAllLeftUint32x4
+	OpRotateAllRightUint32x4
 	OpSetElemUint32x4
+	OpShiftAllLeftAndFillUpperFromUint32x4
+	OpShiftAllRightAndFillUpperFromUint32x4
+	OpMaskedRotateAllLeftUint32x8
+	OpMaskedRotateAllRightUint32x8
+	OpMaskedShiftAllLeftAndFillUpperFromUint32x8
+	OpMaskedShiftAllRightAndFillUpperFromUint32x8
+	OpRotateAllLeftUint32x8
+	OpRotateAllRightUint32x8
+	OpShiftAllLeftAndFillUpperFromUint32x8
+	OpShiftAllRightAndFillUpperFromUint32x8
 	OpGetElemUint64x2
+	OpMaskedRotateAllLeftUint64x2
+	OpMaskedRotateAllRightUint64x2
+	OpMaskedShiftAllLeftAndFillUpperFromUint64x2
+	OpMaskedShiftAllRightAndFillUpperFromUint64x2
+	OpRotateAllLeftUint64x2
+	OpRotateAllRightUint64x2
 	OpSetElemUint64x2
+	OpShiftAllLeftAndFillUpperFromUint64x2
+	OpShiftAllRightAndFillUpperFromUint64x2
+	OpMaskedRotateAllLeftUint64x4
+	OpMaskedRotateAllRightUint64x4
+	OpMaskedShiftAllLeftAndFillUpperFromUint64x4
+	OpMaskedShiftAllRightAndFillUpperFromUint64x4
+	OpRotateAllLeftUint64x4
+	OpRotateAllRightUint64x4
+	OpShiftAllLeftAndFillUpperFromUint64x4
+	OpShiftAllRightAndFillUpperFromUint64x4
+	OpMaskedRotateAllLeftUint64x8
+	OpMaskedRotateAllRightUint64x8
+	OpMaskedShiftAllLeftAndFillUpperFromUint64x8
+	OpMaskedShiftAllRightAndFillUpperFromUint64x8
+	OpRotateAllLeftUint64x8
+	OpRotateAllRightUint64x8
+	OpShiftAllLeftAndFillUpperFromUint64x8
+	OpShiftAllRightAndFillUpperFromUint64x8
 	OpGetElemUint8x16
 	OpSetElemUint8x16
 )
@@ -21551,6 +22153,85 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSLLVWMasked256",
+		argLen: 3,
+		asm:    x86.AVPSLLVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHLDVWMasked256",
+		argLen:       4,
+		resultInArg0: true,
+		asm:          x86.AVPSHLDVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLVWMasked256",
+		argLen: 3,
+		asm:    x86.AVPSRLVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHRDVWMasked256",
+		argLen:       4,
+		resultInArg0: true,
+		asm:          x86.AVPSHRDVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAVWMasked256",
+		argLen: 3,
+		asm:    x86.AVPSRAVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:   "VPSUBWMasked256",
 		argLen: 3,
@@ -21738,6 +22419,122 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSLLW256",
+		argLen: 2,
+		asm:    x86.AVPSLLW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLW256",
+		argLen: 2,
+		asm:    x86.AVPSRLW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAW256",
+		argLen: 2,
+		asm:    x86.AVPSRAW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSLLVW256",
+		argLen: 2,
+		asm:    x86.AVPSLLVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHLDVW256",
+		argLen:       3,
+		resultInArg0: true,
+		asm:          x86.AVPSHLDVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLVW256",
+		argLen: 2,
+		asm:    x86.AVPSRLVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHRDVW256",
+		argLen:       3,
+		resultInArg0: true,
+		asm:          x86.AVPSHRDVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAVW256",
+		argLen: 2,
+		asm:    x86.AVPSRAVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:   "VPSIGNW256",
 		argLen: 2,
@@ -21948,6 +22745,85 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSLLVWMasked512",
+		argLen: 3,
+		asm:    x86.AVPSLLVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHLDVWMasked512",
+		argLen:       4,
+		resultInArg0: true,
+		asm:          x86.AVPSHLDVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLVWMasked512",
+		argLen: 3,
+		asm:    x86.AVPSRLVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHRDVWMasked512",
+		argLen:       4,
+		resultInArg0: true,
+		asm:          x86.AVPSHRDVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAVWMasked512",
+		argLen: 3,
+		asm:    x86.AVPSRAVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:   "VPSUBWMasked512",
 		argLen: 3,
@@ -22079,6 +22955,80 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSLLVW512",
+		argLen: 2,
+		asm:    x86.AVPSLLVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHLDVW512",
+		argLen:       3,
+		resultInArg0: true,
+		asm:          x86.AVPSHLDVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLVW512",
+		argLen: 2,
+		asm:    x86.AVPSRLVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHRDVW512",
+		argLen:       3,
+		resultInArg0: true,
+		asm:          x86.AVPSHRDVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAVW512",
+		argLen: 2,
+		asm:    x86.AVPSRAVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:   "VPSUBW512",
 		argLen: 2,
@@ -22304,6 +23254,85 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSLLVWMasked128",
+		argLen: 3,
+		asm:    x86.AVPSLLVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHLDVWMasked128",
+		argLen:       4,
+		resultInArg0: true,
+		asm:          x86.AVPSHLDVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLVWMasked128",
+		argLen: 3,
+		asm:    x86.AVPSRLVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHRDVWMasked128",
+		argLen:       4,
+		resultInArg0: true,
+		asm:          x86.AVPSHRDVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAVWMasked128",
+		argLen: 3,
+		asm:    x86.AVPSRAVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:   "VPSUBWMasked128",
 		argLen: 3,
@@ -22491,6 +23520,122 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSLLW128",
+		argLen: 2,
+		asm:    x86.AVPSLLW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLW128",
+		argLen: 2,
+		asm:    x86.AVPSRLW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAW128",
+		argLen: 2,
+		asm:    x86.AVPSRAW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSLLVW128",
+		argLen: 2,
+		asm:    x86.AVPSLLVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHLDVW128",
+		argLen:       3,
+		resultInArg0: true,
+		asm:          x86.AVPSHLDVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLVW128",
+		argLen: 2,
+		asm:    x86.AVPSRLVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHRDVW128",
+		argLen:       3,
+		resultInArg0: true,
+		asm:          x86.AVPSHRDVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAVW128",
+		argLen: 2,
+		asm:    x86.AVPSRAVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:   "VPSIGNW128",
 		argLen: 2,
@@ -22732,6 +23877,36 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPROLVDMasked512",
+		argLen: 3,
+		asm:    x86.AVPROLVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPRORVDMasked512",
+		argLen: 3,
+		asm:    x86.AVPRORVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:         "VPDPWSSDSMasked512",
 		argLen:       4,
@@ -22766,6 +23941,85 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSLLVDMasked512",
+		argLen: 3,
+		asm:    x86.AVPSLLVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHLDVDMasked512",
+		argLen:       4,
+		resultInArg0: true,
+		asm:          x86.AVPSHLDVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLVDMasked512",
+		argLen: 3,
+		asm:    x86.AVPSRLVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHRDVDMasked512",
+		argLen:       4,
+		resultInArg0: true,
+		asm:          x86.AVPSHRDVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAVDMasked512",
+		argLen: 3,
+		asm:    x86.AVPSRAVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:   "VPSUBDMasked512",
 		argLen: 3,
@@ -22903,6 +24157,34 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPROLVD512",
+		argLen: 2,
+		asm:    x86.AVPROLVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPRORVD512",
+		argLen: 2,
+		asm:    x86.AVPRORVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:         "VPDPWSSDS512",
 		argLen:       3,
@@ -22935,6 +24217,80 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSLLVD512",
+		argLen: 2,
+		asm:    x86.AVPSLLVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHLDVD512",
+		argLen:       3,
+		resultInArg0: true,
+		asm:          x86.AVPSHLDVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLVD512",
+		argLen: 2,
+		asm:    x86.AVPSRLVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHRDVD512",
+		argLen:       3,
+		resultInArg0: true,
+		asm:          x86.AVPSHRDVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAVD512",
+		argLen: 2,
+		asm:    x86.AVPSRAVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:   "VPSUBD512",
 		argLen: 2,
@@ -23193,6 +24549,36 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPROLVDMasked128",
+		argLen: 3,
+		asm:    x86.AVPROLVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPRORVDMasked128",
+		argLen: 3,
+		asm:    x86.AVPRORVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:         "VPDPWSSDSMasked128",
 		argLen:       4,
@@ -23227,6 +24613,85 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSLLVDMasked128",
+		argLen: 3,
+		asm:    x86.AVPSLLVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHLDVDMasked128",
+		argLen:       4,
+		resultInArg0: true,
+		asm:          x86.AVPSHLDVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLVDMasked128",
+		argLen: 3,
+		asm:    x86.AVPSRLVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHRDVDMasked128",
+		argLen:       4,
+		resultInArg0: true,
+		asm:          x86.AVPSHRDVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAVDMasked128",
+		argLen: 3,
+		asm:    x86.AVPSRAVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:   "VPSUBDMasked128",
 		argLen: 3,
@@ -23392,6 +24857,34 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPROLVD128",
+		argLen: 2,
+		asm:    x86.AVPROLVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPRORVD128",
+		argLen: 2,
+		asm:    x86.AVPRORVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:         "VPDPWSSDS128",
 		argLen:       3,
@@ -23424,6 +24917,122 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSLLD128",
+		argLen: 2,
+		asm:    x86.AVPSLLD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLD128",
+		argLen: 2,
+		asm:    x86.AVPSRLD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAD128",
+		argLen: 2,
+		asm:    x86.AVPSRAD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSLLVD128",
+		argLen: 2,
+		asm:    x86.AVPSLLVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHLDVD128",
+		argLen:       3,
+		resultInArg0: true,
+		asm:          x86.AVPSHLDVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLVD128",
+		argLen: 2,
+		asm:    x86.AVPSRLVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHRDVD128",
+		argLen:       3,
+		resultInArg0: true,
+		asm:          x86.AVPSHRDVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAVD128",
+		argLen: 2,
+		asm:    x86.AVPSRAVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:   "VPSIGND128",
 		argLen: 2,
@@ -23681,6 +25290,36 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPROLVDMasked256",
+		argLen: 3,
+		asm:    x86.AVPROLVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPRORVDMasked256",
+		argLen: 3,
+		asm:    x86.AVPRORVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:         "VPDPWSSDSMasked256",
 		argLen:       4,
@@ -23715,6 +25354,85 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSLLVDMasked256",
+		argLen: 3,
+		asm:    x86.AVPSLLVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHLDVDMasked256",
+		argLen:       4,
+		resultInArg0: true,
+		asm:          x86.AVPSHLDVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLVDMasked256",
+		argLen: 3,
+		asm:    x86.AVPSRLVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHRDVDMasked256",
+		argLen:       4,
+		resultInArg0: true,
+		asm:          x86.AVPSHRDVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAVDMasked256",
+		argLen: 3,
+		asm:    x86.AVPSRAVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:   "VPSUBDMasked256",
 		argLen: 3,
@@ -23880,6 +25598,34 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPROLVD256",
+		argLen: 2,
+		asm:    x86.AVPROLVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPRORVD256",
+		argLen: 2,
+		asm:    x86.AVPRORVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:         "VPDPWSSDS256",
 		argLen:       3,
@@ -23912,6 +25658,122 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSLLD256",
+		argLen: 2,
+		asm:    x86.AVPSLLD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLD256",
+		argLen: 2,
+		asm:    x86.AVPSRLD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAD256",
+		argLen: 2,
+		asm:    x86.AVPSRAD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSLLVD256",
+		argLen: 2,
+		asm:    x86.AVPSLLVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHLDVD256",
+		argLen:       3,
+		resultInArg0: true,
+		asm:          x86.AVPSHLDVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLVD256",
+		argLen: 2,
+		asm:    x86.AVPSRLVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHRDVD256",
+		argLen:       3,
+		resultInArg0: true,
+		asm:          x86.AVPSHRDVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAVD256",
+		argLen: 2,
+		asm:    x86.AVPSRAVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:   "VPSIGND256",
 		argLen: 2,
@@ -24154,6 +26016,160 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPROLVQMasked128",
+		argLen: 3,
+		asm:    x86.AVPROLVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPRORVQMasked128",
+		argLen: 3,
+		asm:    x86.AVPRORVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSLLQMasked128",
+		argLen: 3,
+		asm:    x86.AVPSLLQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLQMasked128",
+		argLen: 3,
+		asm:    x86.AVPSRLQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAQMasked128",
+		argLen: 3,
+		asm:    x86.AVPSRAQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSLLVQMasked128",
+		argLen: 3,
+		asm:    x86.AVPSLLVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHLDVQMasked128",
+		argLen:       4,
+		resultInArg0: true,
+		asm:          x86.AVPSHLDVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLVQMasked128",
+		argLen: 3,
+		asm:    x86.AVPSRLVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHRDVQMasked128",
+		argLen:       4,
+		resultInArg0: true,
+		asm:          x86.AVPSHRDVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAVQMasked128",
+		argLen: 3,
+		asm:    x86.AVPSRAVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:   "VPSUBQMasked128",
 		argLen: 3,
@@ -24243,6 +26259,150 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPROLVQ128",
+		argLen: 2,
+		asm:    x86.AVPROLVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPRORVQ128",
+		argLen: 2,
+		asm:    x86.AVPRORVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSLLQ128",
+		argLen: 2,
+		asm:    x86.AVPSLLQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLQ128",
+		argLen: 2,
+		asm:    x86.AVPSRLQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAQ128",
+		argLen: 2,
+		asm:    x86.AVPSRAQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSLLVQ128",
+		argLen: 2,
+		asm:    x86.AVPSLLVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHLDVQ128",
+		argLen:       3,
+		resultInArg0: true,
+		asm:          x86.AVPSHLDVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLVQ128",
+		argLen: 2,
+		asm:    x86.AVPSRLVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHRDVQ128",
+		argLen:       3,
+		resultInArg0: true,
+		asm:          x86.AVPSHRDVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAVQ128",
+		argLen: 2,
+		asm:    x86.AVPSRAVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:   "VPSUBQ128",
 		argLen: 2,
@@ -24469,6 +26629,160 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPROLVQMasked256",
+		argLen: 3,
+		asm:    x86.AVPROLVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPRORVQMasked256",
+		argLen: 3,
+		asm:    x86.AVPRORVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSLLQMasked256",
+		argLen: 3,
+		asm:    x86.AVPSLLQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLQMasked256",
+		argLen: 3,
+		asm:    x86.AVPSRLQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAQMasked256",
+		argLen: 3,
+		asm:    x86.AVPSRAQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSLLVQMasked256",
+		argLen: 3,
+		asm:    x86.AVPSLLVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHLDVQMasked256",
+		argLen:       4,
+		resultInArg0: true,
+		asm:          x86.AVPSHLDVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLVQMasked256",
+		argLen: 3,
+		asm:    x86.AVPSRLVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHRDVQMasked256",
+		argLen:       4,
+		resultInArg0: true,
+		asm:          x86.AVPSHRDVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAVQMasked256",
+		argLen: 3,
+		asm:    x86.AVPSRAVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:   "VPSUBQMasked256",
 		argLen: 3,
@@ -24558,6 +26872,150 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPROLVQ256",
+		argLen: 2,
+		asm:    x86.AVPROLVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPRORVQ256",
+		argLen: 2,
+		asm:    x86.AVPRORVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSLLQ256",
+		argLen: 2,
+		asm:    x86.AVPSLLQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLQ256",
+		argLen: 2,
+		asm:    x86.AVPSRLQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAQ256",
+		argLen: 2,
+		asm:    x86.AVPSRAQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSLLVQ256",
+		argLen: 2,
+		asm:    x86.AVPSLLVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHLDVQ256",
+		argLen:       3,
+		resultInArg0: true,
+		asm:          x86.AVPSHLDVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLVQ256",
+		argLen: 2,
+		asm:    x86.AVPSRLVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHRDVQ256",
+		argLen:       3,
+		resultInArg0: true,
+		asm:          x86.AVPSHRDVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAVQ256",
+		argLen: 2,
+		asm:    x86.AVPSRAVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:   "VPSUBQ256",
 		argLen: 2,
@@ -24784,6 +27242,160 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPROLVQMasked512",
+		argLen: 3,
+		asm:    x86.AVPROLVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPRORVQMasked512",
+		argLen: 3,
+		asm:    x86.AVPRORVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSLLQMasked512",
+		argLen: 3,
+		asm:    x86.AVPSLLQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLQMasked512",
+		argLen: 3,
+		asm:    x86.AVPSRLQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAQMasked512",
+		argLen: 3,
+		asm:    x86.AVPSRAQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSLLVQMasked512",
+		argLen: 3,
+		asm:    x86.AVPSLLVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHLDVQMasked512",
+		argLen:       4,
+		resultInArg0: true,
+		asm:          x86.AVPSHLDVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLVQMasked512",
+		argLen: 3,
+		asm:    x86.AVPSRLVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHRDVQMasked512",
+		argLen:       4,
+		resultInArg0: true,
+		asm:          x86.AVPSHRDVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAVQMasked512",
+		argLen: 3,
+		asm:    x86.AVPSRAVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:   "VPSUBQMasked512",
 		argLen: 3,
@@ -24903,6 +27515,150 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPROLVQ512",
+		argLen: 2,
+		asm:    x86.AVPROLVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPRORVQ512",
+		argLen: 2,
+		asm:    x86.AVPRORVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSLLQ512",
+		argLen: 2,
+		asm:    x86.AVPSLLQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLQ512",
+		argLen: 2,
+		asm:    x86.AVPSRLQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAQ512",
+		argLen: 2,
+		asm:    x86.AVPSRAQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSLLVQ512",
+		argLen: 2,
+		asm:    x86.AVPSLLVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHLDVQ512",
+		argLen:       3,
+		resultInArg0: true,
+		asm:          x86.AVPSHLDVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLVQ512",
+		argLen: 2,
+		asm:    x86.AVPSRLVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:         "VPSHRDVQ512",
+		argLen:       3,
+		resultInArg0: true,
+		asm:          x86.AVPSHRDVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRAVQ512",
+		argLen: 2,
+		asm:    x86.AVPSRAVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:   "VPSUBQ512",
 		argLen: 2,
@@ -27697,6 +30453,68 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:    "VPSHLDWMasked256",
+		auxType: auxInt8,
+		argLen:  3,
+		asm:     x86.AVPSHLDW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHRDWMasked256",
+		auxType: auxInt8,
+		argLen:  3,
+		asm:     x86.AVPSHRDW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHLDW256",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPSHLDW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHRDW256",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPSHRDW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:        "VPCMPW512",
 		auxType:     auxInt8,
@@ -27730,6 +30548,68 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:    "VPSHLDWMasked512",
+		auxType: auxInt8,
+		argLen:  3,
+		asm:     x86.AVPSHLDW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHRDWMasked512",
+		auxType: auxInt8,
+		argLen:  3,
+		asm:     x86.AVPSHRDW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHLDW512",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPSHLDW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHRDW512",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPSHRDW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:    "VPEXTRW128",
 		auxType: auxInt8,
@@ -27776,6 +30656,38 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:    "VPSHLDWMasked128",
+		auxType: auxInt8,
+		argLen:  3,
+		asm:     x86.AVPSHLDW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHRDWMasked128",
+		auxType: auxInt8,
+		argLen:  3,
+		asm:     x86.AVPSHRDW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:    "VPINSRW128",
 		auxType: auxInt8,
@@ -27791,6 +30703,36 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:    "VPSHLDW128",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPSHLDW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHRDW128",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPSHRDW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:        "VPCMPD512",
 		auxType:     auxInt8,
@@ -27824,6 +30766,126 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:    "VPROLDMasked512",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPROLD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPRORDMasked512",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPRORD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHLDDMasked512",
+		auxType: auxInt8,
+		argLen:  3,
+		asm:     x86.AVPSHLDD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHRDDMasked512",
+		auxType: auxInt8,
+		argLen:  3,
+		asm:     x86.AVPSHRDD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPROLD512",
+		auxType: auxInt8,
+		argLen:  1,
+		asm:     x86.AVPROLD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPRORD512",
+		auxType: auxInt8,
+		argLen:  1,
+		asm:     x86.AVPRORD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHLDD512",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPSHLDD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHRDD512",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPSHRDD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:    "VPEXTRD128",
 		auxType: auxInt8,
@@ -27870,6 +30932,96 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:    "VPROLDMasked128",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPROLD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPRORDMasked128",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPRORD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHLDDMasked128",
+		auxType: auxInt8,
+		argLen:  3,
+		asm:     x86.AVPSHLDD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHRDDMasked128",
+		auxType: auxInt8,
+		argLen:  3,
+		asm:     x86.AVPSHRDD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPROLD128",
+		auxType: auxInt8,
+		argLen:  1,
+		asm:     x86.AVPROLD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPRORD128",
+		auxType: auxInt8,
+		argLen:  1,
+		asm:     x86.AVPRORD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:    "VPINSRD128",
 		auxType: auxInt8,
@@ -27885,6 +31037,36 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:    "VPSHLDD128",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPSHLDD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHRDD128",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPSHRDD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:    "VPCMPD256",
 		auxType: auxInt8,
@@ -27917,6 +31099,126 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:    "VPROLDMasked256",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPROLD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPRORDMasked256",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPRORD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHLDDMasked256",
+		auxType: auxInt8,
+		argLen:  3,
+		asm:     x86.AVPSHLDD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHRDDMasked256",
+		auxType: auxInt8,
+		argLen:  3,
+		asm:     x86.AVPSHRDD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPROLD256",
+		auxType: auxInt8,
+		argLen:  1,
+		asm:     x86.AVPROLD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPRORD256",
+		auxType: auxInt8,
+		argLen:  1,
+		asm:     x86.AVPRORD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHLDD256",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPSHLDD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHRDD256",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPSHRDD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:    "VPEXTRQ128",
 		auxType: auxInt8,
@@ -27963,6 +31265,96 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:    "VPROLQMasked128",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPROLQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPRORQMasked128",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPRORQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHLDQMasked128",
+		auxType: auxInt8,
+		argLen:  3,
+		asm:     x86.AVPSHLDQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHRDQMasked128",
+		auxType: auxInt8,
+		argLen:  3,
+		asm:     x86.AVPSHRDQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPROLQ128",
+		auxType: auxInt8,
+		argLen:  1,
+		asm:     x86.AVPROLQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPRORQ128",
+		auxType: auxInt8,
+		argLen:  1,
+		asm:     x86.AVPRORQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:    "VPINSRQ128",
 		auxType: auxInt8,
@@ -27978,6 +31370,36 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:    "VPSHLDQ128",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPSHLDQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHRDQ128",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPSHRDQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:    "VPCMPQ256",
 		auxType: auxInt8,
@@ -28010,6 +31432,126 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:    "VPROLQMasked256",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPROLQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPRORQMasked256",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPRORQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHLDQMasked256",
+		auxType: auxInt8,
+		argLen:  3,
+		asm:     x86.AVPSHLDQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHRDQMasked256",
+		auxType: auxInt8,
+		argLen:  3,
+		asm:     x86.AVPSHRDQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPROLQ256",
+		auxType: auxInt8,
+		argLen:  1,
+		asm:     x86.AVPROLQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPRORQ256",
+		auxType: auxInt8,
+		argLen:  1,
+		asm:     x86.AVPRORQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHLDQ256",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPSHLDQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHRDQ256",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPSHRDQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:        "VPCMPQ512",
 		auxType:     auxInt8,
@@ -28043,6 +31585,126 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:    "VPROLQMasked512",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPROLQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPRORQMasked512",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPRORQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHLDQMasked512",
+		auxType: auxInt8,
+		argLen:  3,
+		asm:     x86.AVPSHLDQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHRDQMasked512",
+		auxType: auxInt8,
+		argLen:  3,
+		asm:     x86.AVPSHRDQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPROLQ512",
+		auxType: auxInt8,
+		argLen:  1,
+		asm:     x86.AVPROLQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPRORQ512",
+		auxType: auxInt8,
+		argLen:  1,
+		asm:     x86.AVPRORQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHLDQ512",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPSHLDQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:    "VPSHRDQ512",
+		auxType: auxInt8,
+		argLen:  2,
+		asm:     x86.AVPSHRDQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:    "VPEXTRB128",
 		auxType: auxInt8,
@@ -57559,6 +61221,31 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
+	{
+		name:    "MaskedShiftLeftInt16x16",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftLeftAndFillUpperFromInt16x16",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightInt16x16",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightAndFillUpperFromInt16x16",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightSignExtendedInt16x16",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "MaskedSubInt16x16",
 		argLen:  3,
@@ -57641,6 +61328,46 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllLeftInt16x16",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightInt16x16",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightSignExtendedInt16x16",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftInt16x16",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftAndFillUpperFromInt16x16",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightInt16x16",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightAndFillUpperFromInt16x16",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightSignExtendedInt16x16",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "SignInt16x16",
 		argLen:  2,
@@ -57782,6 +61509,31 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
+	{
+		name:    "MaskedShiftLeftInt16x32",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftLeftAndFillUpperFromInt16x32",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightInt16x32",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightAndFillUpperFromInt16x32",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightSignExtendedInt16x32",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "MaskedSubInt16x32",
 		argLen:  3,
@@ -57838,6 +61590,31 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "ShiftLeftInt16x32",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftAndFillUpperFromInt16x32",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightInt16x32",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightAndFillUpperFromInt16x32",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightSignExtendedInt16x32",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "SubInt16x32",
 		argLen:  2,
@@ -57979,6 +61756,31 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
+	{
+		name:    "MaskedShiftLeftInt16x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftLeftAndFillUpperFromInt16x8",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightInt16x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightAndFillUpperFromInt16x8",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightSignExtendedInt16x8",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "MaskedSubInt16x8",
 		argLen:  3,
@@ -58061,6 +61863,46 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllLeftInt16x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightInt16x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightSignExtendedInt16x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftInt16x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftAndFillUpperFromInt16x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightInt16x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightAndFillUpperFromInt16x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightSignExtendedInt16x8",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "SignInt16x8",
 		argLen:  2,
@@ -58213,6 +62055,16 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "MaskedRotateLeftInt32x16",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateRightInt32x16",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "MaskedSaturatedPairDotProdAccumulateInt32x16",
 		argLen:  4,
@@ -58223,6 +62075,31 @@ var opcodeTable = [...]opInfo{
 		argLen:  4,
 		generic: true,
 	},
+	{
+		name:    "MaskedShiftLeftInt32x16",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftLeftAndFillUpperFromInt32x16",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightInt32x16",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightAndFillUpperFromInt32x16",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightSignExtendedInt32x16",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "MaskedSubInt32x16",
 		argLen:  3,
@@ -58279,6 +62156,16 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
+	{
+		name:    "RotateLeftInt32x16",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "RotateRightInt32x16",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "SaturatedPairDotProdAccumulateInt32x16",
 		argLen:  3,
@@ -58289,6 +62176,31 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
+	{
+		name:    "ShiftLeftInt32x16",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftAndFillUpperFromInt32x16",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightInt32x16",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightAndFillUpperFromInt32x16",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightSignExtendedInt32x16",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "SubInt32x16",
 		argLen:  2,
@@ -58441,6 +62353,16 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "MaskedRotateLeftInt32x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateRightInt32x4",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "MaskedSaturatedPairDotProdAccumulateInt32x4",
 		argLen:  4,
@@ -58451,6 +62373,31 @@ var opcodeTable = [...]opInfo{
 		argLen:  4,
 		generic: true,
 	},
+	{
+		name:    "MaskedShiftLeftInt32x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftLeftAndFillUpperFromInt32x4",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightInt32x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightAndFillUpperFromInt32x4",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightSignExtendedInt32x4",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "MaskedSubInt32x4",
 		argLen:  3,
@@ -58523,6 +62470,16 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
+	{
+		name:    "RotateLeftInt32x4",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "RotateRightInt32x4",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "SaturatedPairDotProdAccumulateInt32x4",
 		argLen:  3,
@@ -58533,6 +62490,46 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllLeftInt32x4",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightInt32x4",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightSignExtendedInt32x4",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftInt32x4",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftAndFillUpperFromInt32x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightInt32x4",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightAndFillUpperFromInt32x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightSignExtendedInt32x4",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "SignInt32x4",
 		argLen:  2,
@@ -58690,6 +62687,16 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "MaskedRotateLeftInt32x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateRightInt32x8",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "MaskedSaturatedPairDotProdAccumulateInt32x8",
 		argLen:  4,
@@ -58700,6 +62707,31 @@ var opcodeTable = [...]opInfo{
 		argLen:  4,
 		generic: true,
 	},
+	{
+		name:    "MaskedShiftLeftInt32x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftLeftAndFillUpperFromInt32x8",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightInt32x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightAndFillUpperFromInt32x8",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightSignExtendedInt32x8",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "MaskedSubInt32x8",
 		argLen:  3,
@@ -58772,6 +62804,16 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
+	{
+		name:    "RotateLeftInt32x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "RotateRightInt32x8",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "SaturatedPairDotProdAccumulateInt32x8",
 		argLen:  3,
@@ -58782,6 +62824,46 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllLeftInt32x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightInt32x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightSignExtendedInt32x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftInt32x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftAndFillUpperFromInt32x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightInt32x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightAndFillUpperFromInt32x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightSignExtendedInt32x8",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "SignInt32x8",
 		argLen:  2,
@@ -58940,6 +63022,56 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "MaskedRotateLeftInt64x2",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateRightInt64x2",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllLeftInt64x2",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightInt64x2",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightSignExtendedInt64x2",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftLeftInt64x2",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftLeftAndFillUpperFromInt64x2",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightInt64x2",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightAndFillUpperFromInt64x2",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightSignExtendedInt64x2",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "MaskedSubInt64x2",
 		argLen:  3,
@@ -58992,6 +63124,56 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
+	{
+		name:    "RotateLeftInt64x2",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "RotateRightInt64x2",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllLeftInt64x2",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightInt64x2",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightSignExtendedInt64x2",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftInt64x2",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftAndFillUpperFromInt64x2",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightInt64x2",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightAndFillUpperFromInt64x2",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightSignExtendedInt64x2",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "SubInt64x2",
 		argLen:  2,
@@ -59140,6 +63322,56 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "MaskedRotateLeftInt64x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateRightInt64x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllLeftInt64x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightInt64x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightSignExtendedInt64x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftLeftInt64x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftLeftAndFillUpperFromInt64x4",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightInt64x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightAndFillUpperFromInt64x4",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightSignExtendedInt64x4",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "MaskedSubInt64x4",
 		argLen:  3,
@@ -59192,6 +63424,56 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
+	{
+		name:    "RotateLeftInt64x4",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "RotateRightInt64x4",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllLeftInt64x4",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightInt64x4",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightSignExtendedInt64x4",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftInt64x4",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftAndFillUpperFromInt64x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightInt64x4",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightAndFillUpperFromInt64x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightSignExtendedInt64x4",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "SubInt64x4",
 		argLen:  2,
@@ -59340,6 +63622,56 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "MaskedRotateLeftInt64x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateRightInt64x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllLeftInt64x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightInt64x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightSignExtendedInt64x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftLeftInt64x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftLeftAndFillUpperFromInt64x8",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightInt64x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightAndFillUpperFromInt64x8",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightSignExtendedInt64x8",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "MaskedSubInt64x8",
 		argLen:  3,
@@ -59392,6 +63724,56 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
+	{
+		name:    "RotateLeftInt64x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "RotateRightInt64x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllLeftInt64x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightInt64x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightSignExtendedInt64x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftInt64x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftAndFillUpperFromInt64x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightInt64x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightAndFillUpperFromInt64x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightSignExtendedInt64x8",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "SubInt64x8",
 		argLen:  2,
@@ -60042,6 +64424,31 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
+	{
+		name:    "MaskedShiftLeftUint16x16",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftLeftAndFillUpperFromUint16x16",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightUint16x16",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightAndFillUpperFromUint16x16",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightSignExtendedUint16x16",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "MaskedSubUint16x16",
 		argLen:  3,
@@ -60103,6 +64510,41 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllLeftUint16x16",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightUint16x16",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftUint16x16",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftAndFillUpperFromUint16x16",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightUint16x16",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightAndFillUpperFromUint16x16",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightSignExtendedUint16x16",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "SubUint16x16",
 		argLen:  2,
@@ -60230,6 +64672,31 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
+	{
+		name:    "MaskedShiftLeftUint16x32",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftLeftAndFillUpperFromUint16x32",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightUint16x32",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightAndFillUpperFromUint16x32",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightSignExtendedUint16x32",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "MaskedSubUint16x32",
 		argLen:  3,
@@ -60275,6 +64742,31 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "ShiftLeftUint16x32",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftAndFillUpperFromUint16x32",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightUint16x32",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightAndFillUpperFromUint16x32",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightSignExtendedUint16x32",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "SubUint16x32",
 		argLen:  2,
@@ -60407,6 +64899,31 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
+	{
+		name:    "MaskedShiftLeftUint16x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftLeftAndFillUpperFromUint16x8",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightUint16x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightAndFillUpperFromUint16x8",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightSignExtendedUint16x8",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "MaskedSubUint16x8",
 		argLen:  3,
@@ -60468,6 +64985,41 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllLeftUint16x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightUint16x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftUint16x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftAndFillUpperFromUint16x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightUint16x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightAndFillUpperFromUint16x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightSignExtendedUint16x8",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "SubUint16x8",
 		argLen:  2,
@@ -60594,11 +65146,46 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "MaskedRotateLeftUint32x16",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateRightUint32x16",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16",
 		argLen:  4,
 		generic: true,
 	},
+	{
+		name:    "MaskedShiftLeftUint32x16",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftLeftAndFillUpperFromUint32x16",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightUint32x16",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightAndFillUpperFromUint32x16",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightSignExtendedUint32x16",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "MaskedSubUint32x16",
 		argLen:  3,
@@ -60644,11 +65231,46 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
+	{
+		name:    "RotateLeftUint32x16",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "RotateRightUint32x16",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16",
 		argLen:  3,
 		generic: true,
 	},
+	{
+		name:    "ShiftLeftUint32x16",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftAndFillUpperFromUint32x16",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightUint32x16",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightAndFillUpperFromUint32x16",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightSignExtendedUint32x16",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "SubUint32x16",
 		argLen:  2,
@@ -60780,11 +65402,46 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "MaskedRotateLeftUint32x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateRightUint32x4",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4",
 		argLen:  4,
 		generic: true,
 	},
+	{
+		name:    "MaskedShiftLeftUint32x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftLeftAndFillUpperFromUint32x4",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightUint32x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightAndFillUpperFromUint32x4",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightSignExtendedUint32x4",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "MaskedSubUint32x4",
 		argLen:  3,
@@ -60846,11 +65503,56 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
+	{
+		name:    "RotateLeftUint32x4",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "RotateRightUint32x4",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x4",
 		argLen:  3,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllLeftUint32x4",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightUint32x4",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftUint32x4",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftAndFillUpperFromUint32x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightUint32x4",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightAndFillUpperFromUint32x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightSignExtendedUint32x4",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "SubUint32x4",
 		argLen:  2,
@@ -60982,11 +65684,46 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "MaskedRotateLeftUint32x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateRightUint32x8",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8",
 		argLen:  4,
 		generic: true,
 	},
+	{
+		name:    "MaskedShiftLeftUint32x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftLeftAndFillUpperFromUint32x8",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightUint32x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightAndFillUpperFromUint32x8",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightSignExtendedUint32x8",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "MaskedSubUint32x8",
 		argLen:  3,
@@ -61048,11 +65785,56 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
+	{
+		name:    "RotateLeftUint32x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "RotateRightUint32x8",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8",
 		argLen:  3,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllLeftUint32x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightUint32x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftUint32x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftAndFillUpperFromUint32x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightUint32x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightAndFillUpperFromUint32x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightSignExtendedUint32x8",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "SubUint32x8",
 		argLen:  2,
@@ -61190,6 +65972,51 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "MaskedRotateLeftUint64x2",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateRightUint64x2",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllLeftUint64x2",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightUint64x2",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftLeftUint64x2",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftLeftAndFillUpperFromUint64x2",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightUint64x2",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightAndFillUpperFromUint64x2",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightSignExtendedUint64x2",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "MaskedSubUint64x2",
 		argLen:  3,
@@ -61236,6 +66063,51 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
+	{
+		name:    "RotateLeftUint64x2",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "RotateRightUint64x2",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllLeftUint64x2",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightUint64x2",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftUint64x2",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftAndFillUpperFromUint64x2",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightUint64x2",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightAndFillUpperFromUint64x2",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightSignExtendedUint64x2",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "SubUint64x2",
 		argLen:  2,
@@ -61368,6 +66240,51 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "MaskedRotateLeftUint64x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateRightUint64x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllLeftUint64x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightUint64x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftLeftUint64x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftLeftAndFillUpperFromUint64x4",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightUint64x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightAndFillUpperFromUint64x4",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightSignExtendedUint64x4",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "MaskedSubUint64x4",
 		argLen:  3,
@@ -61414,6 +66331,51 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
+	{
+		name:    "RotateLeftUint64x4",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "RotateRightUint64x4",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllLeftUint64x4",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightUint64x4",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftUint64x4",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftAndFillUpperFromUint64x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightUint64x4",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightAndFillUpperFromUint64x4",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightSignExtendedUint64x4",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "SubUint64x4",
 		argLen:  2,
@@ -61546,6 +66508,51 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "MaskedRotateLeftUint64x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateRightUint64x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllLeftUint64x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightUint64x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftLeftUint64x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftLeftAndFillUpperFromUint64x8",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightUint64x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightAndFillUpperFromUint64x8",
+		argLen:  4,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftRightSignExtendedUint64x8",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "MaskedSubUint64x8",
 		argLen:  3,
@@ -61592,6 +66599,51 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
+	{
+		name:    "RotateLeftUint64x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "RotateRightUint64x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllLeftUint64x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightUint64x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftUint64x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftLeftAndFillUpperFromUint64x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightUint64x8",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightAndFillUpperFromUint64x8",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftRightSignExtendedUint64x8",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "SubUint64x8",
 		argLen:  2,
@@ -63293,42 +68345,402 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
+	{
+		name:    "MaskedShiftAllLeftAndFillUpperFromInt16x16",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightAndFillUpperFromInt16x16",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllLeftAndFillUpperFromInt16x16",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightAndFillUpperFromInt16x16",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllLeftAndFillUpperFromInt16x32",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightAndFillUpperFromInt16x32",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllLeftAndFillUpperFromInt16x32",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightAndFillUpperFromInt16x32",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "GetElemInt16x8",
 		auxType: auxInt8,
 		argLen:  1,
 		generic: true,
 	},
+	{
+		name:    "MaskedShiftAllLeftAndFillUpperFromInt16x8",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightAndFillUpperFromInt16x8",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "SetElemInt16x8",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllLeftAndFillUpperFromInt16x8",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightAndFillUpperFromInt16x8",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateAllLeftInt32x16",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateAllRightInt32x16",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllLeftAndFillUpperFromInt32x16",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightAndFillUpperFromInt32x16",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "RotateAllLeftInt32x16",
+		auxType: auxInt8,
+		argLen:  1,
+		generic: true,
+	},
+	{
+		name:    "RotateAllRightInt32x16",
+		auxType: auxInt8,
+		argLen:  1,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllLeftAndFillUpperFromInt32x16",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightAndFillUpperFromInt32x16",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "GetElemInt32x4",
 		auxType: auxInt8,
 		argLen:  1,
 		generic: true,
 	},
+	{
+		name:    "MaskedRotateAllLeftInt32x4",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateAllRightInt32x4",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllLeftAndFillUpperFromInt32x4",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightAndFillUpperFromInt32x4",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "RotateAllLeftInt32x4",
+		auxType: auxInt8,
+		argLen:  1,
+		generic: true,
+	},
+	{
+		name:    "RotateAllRightInt32x4",
+		auxType: auxInt8,
+		argLen:  1,
+		generic: true,
+	},
 	{
 		name:    "SetElemInt32x4",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllLeftAndFillUpperFromInt32x4",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightAndFillUpperFromInt32x4",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateAllLeftInt32x8",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateAllRightInt32x8",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllLeftAndFillUpperFromInt32x8",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightAndFillUpperFromInt32x8",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "RotateAllLeftInt32x8",
+		auxType: auxInt8,
+		argLen:  1,
+		generic: true,
+	},
+	{
+		name:    "RotateAllRightInt32x8",
+		auxType: auxInt8,
+		argLen:  1,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllLeftAndFillUpperFromInt32x8",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightAndFillUpperFromInt32x8",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "GetElemInt64x2",
 		auxType: auxInt8,
 		argLen:  1,
 		generic: true,
 	},
+	{
+		name:    "MaskedRotateAllLeftInt64x2",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateAllRightInt64x2",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllLeftAndFillUpperFromInt64x2",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightAndFillUpperFromInt64x2",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "RotateAllLeftInt64x2",
+		auxType: auxInt8,
+		argLen:  1,
+		generic: true,
+	},
+	{
+		name:    "RotateAllRightInt64x2",
+		auxType: auxInt8,
+		argLen:  1,
+		generic: true,
+	},
 	{
 		name:    "SetElemInt64x2",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllLeftAndFillUpperFromInt64x2",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightAndFillUpperFromInt64x2",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateAllLeftInt64x4",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateAllRightInt64x4",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllLeftAndFillUpperFromInt64x4",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightAndFillUpperFromInt64x4",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "RotateAllLeftInt64x4",
+		auxType: auxInt8,
+		argLen:  1,
+		generic: true,
+	},
+	{
+		name:    "RotateAllRightInt64x4",
+		auxType: auxInt8,
+		argLen:  1,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllLeftAndFillUpperFromInt64x4",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightAndFillUpperFromInt64x4",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateAllLeftInt64x8",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateAllRightInt64x8",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllLeftAndFillUpperFromInt64x8",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightAndFillUpperFromInt64x8",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "RotateAllLeftInt64x8",
+		auxType: auxInt8,
+		argLen:  1,
+		generic: true,
+	},
+	{
+		name:    "RotateAllRightInt64x8",
+		auxType: auxInt8,
+		argLen:  1,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllLeftAndFillUpperFromInt64x8",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightAndFillUpperFromInt64x8",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "GetElemInt8x16",
 		auxType: auxInt8,
@@ -63341,42 +68753,402 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "MaskedShiftAllLeftAndFillUpperFromUint16x16",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightAndFillUpperFromUint16x16",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllLeftAndFillUpperFromUint16x16",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightAndFillUpperFromUint16x16",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllLeftAndFillUpperFromUint16x32",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightAndFillUpperFromUint16x32",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllLeftAndFillUpperFromUint16x32",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightAndFillUpperFromUint16x32",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "GetElemUint16x8",
 		auxType: auxInt8,
 		argLen:  1,
 		generic: true,
 	},
+	{
+		name:    "MaskedShiftAllLeftAndFillUpperFromUint16x8",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightAndFillUpperFromUint16x8",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "SetElemUint16x8",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllLeftAndFillUpperFromUint16x8",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightAndFillUpperFromUint16x8",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateAllLeftUint32x16",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateAllRightUint32x16",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllLeftAndFillUpperFromUint32x16",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightAndFillUpperFromUint32x16",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "RotateAllLeftUint32x16",
+		auxType: auxInt8,
+		argLen:  1,
+		generic: true,
+	},
+	{
+		name:    "RotateAllRightUint32x16",
+		auxType: auxInt8,
+		argLen:  1,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllLeftAndFillUpperFromUint32x16",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightAndFillUpperFromUint32x16",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "GetElemUint32x4",
 		auxType: auxInt8,
 		argLen:  1,
 		generic: true,
 	},
+	{
+		name:    "MaskedRotateAllLeftUint32x4",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateAllRightUint32x4",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllLeftAndFillUpperFromUint32x4",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightAndFillUpperFromUint32x4",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "RotateAllLeftUint32x4",
+		auxType: auxInt8,
+		argLen:  1,
+		generic: true,
+	},
+	{
+		name:    "RotateAllRightUint32x4",
+		auxType: auxInt8,
+		argLen:  1,
+		generic: true,
+	},
 	{
 		name:    "SetElemUint32x4",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllLeftAndFillUpperFromUint32x4",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightAndFillUpperFromUint32x4",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateAllLeftUint32x8",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateAllRightUint32x8",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllLeftAndFillUpperFromUint32x8",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightAndFillUpperFromUint32x8",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "RotateAllLeftUint32x8",
+		auxType: auxInt8,
+		argLen:  1,
+		generic: true,
+	},
+	{
+		name:    "RotateAllRightUint32x8",
+		auxType: auxInt8,
+		argLen:  1,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllLeftAndFillUpperFromUint32x8",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightAndFillUpperFromUint32x8",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "GetElemUint64x2",
 		auxType: auxInt8,
 		argLen:  1,
 		generic: true,
 	},
+	{
+		name:    "MaskedRotateAllLeftUint64x2",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateAllRightUint64x2",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllLeftAndFillUpperFromUint64x2",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightAndFillUpperFromUint64x2",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "RotateAllLeftUint64x2",
+		auxType: auxInt8,
+		argLen:  1,
+		generic: true,
+	},
+	{
+		name:    "RotateAllRightUint64x2",
+		auxType: auxInt8,
+		argLen:  1,
+		generic: true,
+	},
 	{
 		name:    "SetElemUint64x2",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllLeftAndFillUpperFromUint64x2",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightAndFillUpperFromUint64x2",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateAllLeftUint64x4",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateAllRightUint64x4",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllLeftAndFillUpperFromUint64x4",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightAndFillUpperFromUint64x4",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "RotateAllLeftUint64x4",
+		auxType: auxInt8,
+		argLen:  1,
+		generic: true,
+	},
+	{
+		name:    "RotateAllRightUint64x4",
+		auxType: auxInt8,
+		argLen:  1,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllLeftAndFillUpperFromUint64x4",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightAndFillUpperFromUint64x4",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateAllLeftUint64x8",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedRotateAllRightUint64x8",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllLeftAndFillUpperFromUint64x8",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "MaskedShiftAllRightAndFillUpperFromUint64x8",
+		auxType: auxInt8,
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "RotateAllLeftUint64x8",
+		auxType: auxInt8,
+		argLen:  1,
+		generic: true,
+	},
+	{
+		name:    "RotateAllRightUint64x8",
+		auxType: auxInt8,
+		argLen:  1,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllLeftAndFillUpperFromUint64x8",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightAndFillUpperFromUint64x8",
+		auxType: auxInt8,
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "GetElemUint8x16",
 		auxType: auxInt8,
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 668024a00fb..d7aa0339e7c 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -2862,6 +2862,102 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpMaskedPopCountUint8x32(v)
 	case OpMaskedPopCountUint8x64:
 		return rewriteValueAMD64_OpMaskedPopCountUint8x64(v)
+	case OpMaskedRotateAllLeftInt32x16:
+		return rewriteValueAMD64_OpMaskedRotateAllLeftInt32x16(v)
+	case OpMaskedRotateAllLeftInt32x4:
+		return rewriteValueAMD64_OpMaskedRotateAllLeftInt32x4(v)
+	case OpMaskedRotateAllLeftInt32x8:
+		return rewriteValueAMD64_OpMaskedRotateAllLeftInt32x8(v)
+	case OpMaskedRotateAllLeftInt64x2:
+		return rewriteValueAMD64_OpMaskedRotateAllLeftInt64x2(v)
+	case OpMaskedRotateAllLeftInt64x4:
+		return rewriteValueAMD64_OpMaskedRotateAllLeftInt64x4(v)
+	case OpMaskedRotateAllLeftInt64x8:
+		return rewriteValueAMD64_OpMaskedRotateAllLeftInt64x8(v)
+	case OpMaskedRotateAllLeftUint32x16:
+		return rewriteValueAMD64_OpMaskedRotateAllLeftUint32x16(v)
+	case OpMaskedRotateAllLeftUint32x4:
+		return rewriteValueAMD64_OpMaskedRotateAllLeftUint32x4(v)
+	case OpMaskedRotateAllLeftUint32x8:
+		return rewriteValueAMD64_OpMaskedRotateAllLeftUint32x8(v)
+	case OpMaskedRotateAllLeftUint64x2:
+		return rewriteValueAMD64_OpMaskedRotateAllLeftUint64x2(v)
+	case OpMaskedRotateAllLeftUint64x4:
+		return rewriteValueAMD64_OpMaskedRotateAllLeftUint64x4(v)
+	case OpMaskedRotateAllLeftUint64x8:
+		return rewriteValueAMD64_OpMaskedRotateAllLeftUint64x8(v)
+	case OpMaskedRotateAllRightInt32x16:
+		return rewriteValueAMD64_OpMaskedRotateAllRightInt32x16(v)
+	case OpMaskedRotateAllRightInt32x4:
+		return rewriteValueAMD64_OpMaskedRotateAllRightInt32x4(v)
+	case OpMaskedRotateAllRightInt32x8:
+		return rewriteValueAMD64_OpMaskedRotateAllRightInt32x8(v)
+	case OpMaskedRotateAllRightInt64x2:
+		return rewriteValueAMD64_OpMaskedRotateAllRightInt64x2(v)
+	case OpMaskedRotateAllRightInt64x4:
+		return rewriteValueAMD64_OpMaskedRotateAllRightInt64x4(v)
+	case OpMaskedRotateAllRightInt64x8:
+		return rewriteValueAMD64_OpMaskedRotateAllRightInt64x8(v)
+	case OpMaskedRotateAllRightUint32x16:
+		return rewriteValueAMD64_OpMaskedRotateAllRightUint32x16(v)
+	case OpMaskedRotateAllRightUint32x4:
+		return rewriteValueAMD64_OpMaskedRotateAllRightUint32x4(v)
+	case OpMaskedRotateAllRightUint32x8:
+		return rewriteValueAMD64_OpMaskedRotateAllRightUint32x8(v)
+	case OpMaskedRotateAllRightUint64x2:
+		return rewriteValueAMD64_OpMaskedRotateAllRightUint64x2(v)
+	case OpMaskedRotateAllRightUint64x4:
+		return rewriteValueAMD64_OpMaskedRotateAllRightUint64x4(v)
+	case OpMaskedRotateAllRightUint64x8:
+		return rewriteValueAMD64_OpMaskedRotateAllRightUint64x8(v)
+	case OpMaskedRotateLeftInt32x16:
+		return rewriteValueAMD64_OpMaskedRotateLeftInt32x16(v)
+	case OpMaskedRotateLeftInt32x4:
+		return rewriteValueAMD64_OpMaskedRotateLeftInt32x4(v)
+	case OpMaskedRotateLeftInt32x8:
+		return rewriteValueAMD64_OpMaskedRotateLeftInt32x8(v)
+	case OpMaskedRotateLeftInt64x2:
+		return rewriteValueAMD64_OpMaskedRotateLeftInt64x2(v)
+	case OpMaskedRotateLeftInt64x4:
+		return rewriteValueAMD64_OpMaskedRotateLeftInt64x4(v)
+	case OpMaskedRotateLeftInt64x8:
+		return rewriteValueAMD64_OpMaskedRotateLeftInt64x8(v)
+	case OpMaskedRotateLeftUint32x16:
+		return rewriteValueAMD64_OpMaskedRotateLeftUint32x16(v)
+	case OpMaskedRotateLeftUint32x4:
+		return rewriteValueAMD64_OpMaskedRotateLeftUint32x4(v)
+	case OpMaskedRotateLeftUint32x8:
+		return rewriteValueAMD64_OpMaskedRotateLeftUint32x8(v)
+	case OpMaskedRotateLeftUint64x2:
+		return rewriteValueAMD64_OpMaskedRotateLeftUint64x2(v)
+	case OpMaskedRotateLeftUint64x4:
+		return rewriteValueAMD64_OpMaskedRotateLeftUint64x4(v)
+	case OpMaskedRotateLeftUint64x8:
+		return rewriteValueAMD64_OpMaskedRotateLeftUint64x8(v)
+	case OpMaskedRotateRightInt32x16:
+		return rewriteValueAMD64_OpMaskedRotateRightInt32x16(v)
+	case OpMaskedRotateRightInt32x4:
+		return rewriteValueAMD64_OpMaskedRotateRightInt32x4(v)
+	case OpMaskedRotateRightInt32x8:
+		return rewriteValueAMD64_OpMaskedRotateRightInt32x8(v)
+	case OpMaskedRotateRightInt64x2:
+		return rewriteValueAMD64_OpMaskedRotateRightInt64x2(v)
+	case OpMaskedRotateRightInt64x4:
+		return rewriteValueAMD64_OpMaskedRotateRightInt64x4(v)
+	case OpMaskedRotateRightInt64x8:
+		return rewriteValueAMD64_OpMaskedRotateRightInt64x8(v)
+	case OpMaskedRotateRightUint32x16:
+		return rewriteValueAMD64_OpMaskedRotateRightUint32x16(v)
+	case OpMaskedRotateRightUint32x4:
+		return rewriteValueAMD64_OpMaskedRotateRightUint32x4(v)
+	case OpMaskedRotateRightUint32x8:
+		return rewriteValueAMD64_OpMaskedRotateRightUint32x8(v)
+	case OpMaskedRotateRightUint64x2:
+		return rewriteValueAMD64_OpMaskedRotateRightUint64x2(v)
+	case OpMaskedRotateRightUint64x4:
+		return rewriteValueAMD64_OpMaskedRotateRightUint64x4(v)
+	case OpMaskedRotateRightUint64x8:
+		return rewriteValueAMD64_OpMaskedRotateRightUint64x8(v)
 	case OpMaskedRoundSuppressExceptionWithPrecisionFloat32x16:
 		return rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat32x16(v)
 	case OpMaskedRoundSuppressExceptionWithPrecisionFloat32x4:
@@ -2958,6 +3054,288 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4(v)
 	case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8:
 		return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8(v)
+	case OpMaskedShiftAllLeftAndFillUpperFromInt16x16:
+		return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt16x16(v)
+	case OpMaskedShiftAllLeftAndFillUpperFromInt16x32:
+		return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt16x32(v)
+	case OpMaskedShiftAllLeftAndFillUpperFromInt16x8:
+		return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt16x8(v)
+	case OpMaskedShiftAllLeftAndFillUpperFromInt32x16:
+		return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt32x16(v)
+	case OpMaskedShiftAllLeftAndFillUpperFromInt32x4:
+		return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt32x4(v)
+	case OpMaskedShiftAllLeftAndFillUpperFromInt32x8:
+		return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt32x8(v)
+	case OpMaskedShiftAllLeftAndFillUpperFromInt64x2:
+		return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt64x2(v)
+	case OpMaskedShiftAllLeftAndFillUpperFromInt64x4:
+		return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt64x4(v)
+	case OpMaskedShiftAllLeftAndFillUpperFromInt64x8:
+		return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt64x8(v)
+	case OpMaskedShiftAllLeftAndFillUpperFromUint16x16:
+		return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint16x16(v)
+	case OpMaskedShiftAllLeftAndFillUpperFromUint16x32:
+		return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint16x32(v)
+	case OpMaskedShiftAllLeftAndFillUpperFromUint16x8:
+		return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint16x8(v)
+	case OpMaskedShiftAllLeftAndFillUpperFromUint32x16:
+		return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint32x16(v)
+	case OpMaskedShiftAllLeftAndFillUpperFromUint32x4:
+		return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint32x4(v)
+	case OpMaskedShiftAllLeftAndFillUpperFromUint32x8:
+		return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint32x8(v)
+	case OpMaskedShiftAllLeftAndFillUpperFromUint64x2:
+		return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint64x2(v)
+	case OpMaskedShiftAllLeftAndFillUpperFromUint64x4:
+		return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint64x4(v)
+	case OpMaskedShiftAllLeftAndFillUpperFromUint64x8:
+		return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint64x8(v)
+	case OpMaskedShiftAllLeftInt64x2:
+		return rewriteValueAMD64_OpMaskedShiftAllLeftInt64x2(v)
+	case OpMaskedShiftAllLeftInt64x4:
+		return rewriteValueAMD64_OpMaskedShiftAllLeftInt64x4(v)
+	case OpMaskedShiftAllLeftInt64x8:
+		return rewriteValueAMD64_OpMaskedShiftAllLeftInt64x8(v)
+	case OpMaskedShiftAllLeftUint64x2:
+		return rewriteValueAMD64_OpMaskedShiftAllLeftUint64x2(v)
+	case OpMaskedShiftAllLeftUint64x4:
+		return rewriteValueAMD64_OpMaskedShiftAllLeftUint64x4(v)
+	case OpMaskedShiftAllLeftUint64x8:
+		return rewriteValueAMD64_OpMaskedShiftAllLeftUint64x8(v)
+	case OpMaskedShiftAllRightAndFillUpperFromInt16x16:
+		return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt16x16(v)
+	case OpMaskedShiftAllRightAndFillUpperFromInt16x32:
+		return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt16x32(v)
+	case OpMaskedShiftAllRightAndFillUpperFromInt16x8:
+		return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt16x8(v)
+	case OpMaskedShiftAllRightAndFillUpperFromInt32x16:
+		return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt32x16(v)
+	case OpMaskedShiftAllRightAndFillUpperFromInt32x4:
+		return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt32x4(v)
+	case OpMaskedShiftAllRightAndFillUpperFromInt32x8:
+		return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt32x8(v)
+	case OpMaskedShiftAllRightAndFillUpperFromInt64x2:
+		return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt64x2(v)
+	case OpMaskedShiftAllRightAndFillUpperFromInt64x4:
+		return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt64x4(v)
+	case OpMaskedShiftAllRightAndFillUpperFromInt64x8:
+		return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt64x8(v)
+	case OpMaskedShiftAllRightAndFillUpperFromUint16x16:
+		return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint16x16(v)
+	case OpMaskedShiftAllRightAndFillUpperFromUint16x32:
+		return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint16x32(v)
+	case OpMaskedShiftAllRightAndFillUpperFromUint16x8:
+		return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint16x8(v)
+	case OpMaskedShiftAllRightAndFillUpperFromUint32x16:
+		return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint32x16(v)
+	case OpMaskedShiftAllRightAndFillUpperFromUint32x4:
+		return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint32x4(v)
+	case OpMaskedShiftAllRightAndFillUpperFromUint32x8:
+		return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint32x8(v)
+	case OpMaskedShiftAllRightAndFillUpperFromUint64x2:
+		return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint64x2(v)
+	case OpMaskedShiftAllRightAndFillUpperFromUint64x4:
+		return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint64x4(v)
+	case OpMaskedShiftAllRightAndFillUpperFromUint64x8:
+		return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint64x8(v)
+	case OpMaskedShiftAllRightInt64x2:
+		return rewriteValueAMD64_OpMaskedShiftAllRightInt64x2(v)
+	case OpMaskedShiftAllRightInt64x4:
+		return rewriteValueAMD64_OpMaskedShiftAllRightInt64x4(v)
+	case OpMaskedShiftAllRightInt64x8:
+		return rewriteValueAMD64_OpMaskedShiftAllRightInt64x8(v)
+	case OpMaskedShiftAllRightSignExtendedInt64x2:
+		return rewriteValueAMD64_OpMaskedShiftAllRightSignExtendedInt64x2(v)
+	case OpMaskedShiftAllRightSignExtendedInt64x4:
+		return rewriteValueAMD64_OpMaskedShiftAllRightSignExtendedInt64x4(v)
+	case OpMaskedShiftAllRightSignExtendedInt64x8:
+		return rewriteValueAMD64_OpMaskedShiftAllRightSignExtendedInt64x8(v)
+	case OpMaskedShiftAllRightUint64x2:
+		return rewriteValueAMD64_OpMaskedShiftAllRightUint64x2(v)
+	case OpMaskedShiftAllRightUint64x4:
+		return rewriteValueAMD64_OpMaskedShiftAllRightUint64x4(v)
+	case OpMaskedShiftAllRightUint64x8:
+		return rewriteValueAMD64_OpMaskedShiftAllRightUint64x8(v)
+	case OpMaskedShiftLeftAndFillUpperFromInt16x16:
+		return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt16x16(v)
+	case OpMaskedShiftLeftAndFillUpperFromInt16x32:
+		return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt16x32(v)
+	case OpMaskedShiftLeftAndFillUpperFromInt16x8:
+		return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt16x8(v)
+	case OpMaskedShiftLeftAndFillUpperFromInt32x16:
+		return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt32x16(v)
+	case OpMaskedShiftLeftAndFillUpperFromInt32x4:
+		return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt32x4(v)
+	case OpMaskedShiftLeftAndFillUpperFromInt32x8:
+		return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt32x8(v)
+	case OpMaskedShiftLeftAndFillUpperFromInt64x2:
+		return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt64x2(v)
+	case OpMaskedShiftLeftAndFillUpperFromInt64x4:
+		return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt64x4(v)
+	case OpMaskedShiftLeftAndFillUpperFromInt64x8:
+		return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt64x8(v)
+	case OpMaskedShiftLeftAndFillUpperFromUint16x16:
+		return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint16x16(v)
+	case OpMaskedShiftLeftAndFillUpperFromUint16x32:
+		return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint16x32(v)
+	case OpMaskedShiftLeftAndFillUpperFromUint16x8:
+		return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint16x8(v)
+	case OpMaskedShiftLeftAndFillUpperFromUint32x16:
+		return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint32x16(v)
+	case OpMaskedShiftLeftAndFillUpperFromUint32x4:
+		return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint32x4(v)
+	case OpMaskedShiftLeftAndFillUpperFromUint32x8:
+		return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint32x8(v)
+	case OpMaskedShiftLeftAndFillUpperFromUint64x2:
+		return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint64x2(v)
+	case OpMaskedShiftLeftAndFillUpperFromUint64x4:
+		return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint64x4(v)
+	case OpMaskedShiftLeftAndFillUpperFromUint64x8:
+		return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint64x8(v)
+	case OpMaskedShiftLeftInt16x16:
+		return rewriteValueAMD64_OpMaskedShiftLeftInt16x16(v)
+	case OpMaskedShiftLeftInt16x32:
+		return rewriteValueAMD64_OpMaskedShiftLeftInt16x32(v)
+	case OpMaskedShiftLeftInt16x8:
+		return rewriteValueAMD64_OpMaskedShiftLeftInt16x8(v)
+	case OpMaskedShiftLeftInt32x16:
+		return rewriteValueAMD64_OpMaskedShiftLeftInt32x16(v)
+	case OpMaskedShiftLeftInt32x4:
+		return rewriteValueAMD64_OpMaskedShiftLeftInt32x4(v)
+	case OpMaskedShiftLeftInt32x8:
+		return rewriteValueAMD64_OpMaskedShiftLeftInt32x8(v)
+	case OpMaskedShiftLeftInt64x2:
+		return rewriteValueAMD64_OpMaskedShiftLeftInt64x2(v)
+	case OpMaskedShiftLeftInt64x4:
+		return rewriteValueAMD64_OpMaskedShiftLeftInt64x4(v)
+	case OpMaskedShiftLeftInt64x8:
+		return rewriteValueAMD64_OpMaskedShiftLeftInt64x8(v)
+	case OpMaskedShiftLeftUint16x16:
+		return rewriteValueAMD64_OpMaskedShiftLeftUint16x16(v)
+	case OpMaskedShiftLeftUint16x32:
+		return rewriteValueAMD64_OpMaskedShiftLeftUint16x32(v)
+	case OpMaskedShiftLeftUint16x8:
+		return rewriteValueAMD64_OpMaskedShiftLeftUint16x8(v)
+	case OpMaskedShiftLeftUint32x16:
+		return rewriteValueAMD64_OpMaskedShiftLeftUint32x16(v)
+	case OpMaskedShiftLeftUint32x4:
+		return rewriteValueAMD64_OpMaskedShiftLeftUint32x4(v)
+	case OpMaskedShiftLeftUint32x8:
+		return rewriteValueAMD64_OpMaskedShiftLeftUint32x8(v)
+	case OpMaskedShiftLeftUint64x2:
+		return rewriteValueAMD64_OpMaskedShiftLeftUint64x2(v)
+	case OpMaskedShiftLeftUint64x4:
+		return rewriteValueAMD64_OpMaskedShiftLeftUint64x4(v)
+	case OpMaskedShiftLeftUint64x8:
+		return rewriteValueAMD64_OpMaskedShiftLeftUint64x8(v)
+	case OpMaskedShiftRightAndFillUpperFromInt16x16:
+		return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt16x16(v)
+	case OpMaskedShiftRightAndFillUpperFromInt16x32:
+		return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt16x32(v)
+	case OpMaskedShiftRightAndFillUpperFromInt16x8:
+		return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt16x8(v)
+	case OpMaskedShiftRightAndFillUpperFromInt32x16:
+		return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt32x16(v)
+	case OpMaskedShiftRightAndFillUpperFromInt32x4:
+		return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt32x4(v)
+	case OpMaskedShiftRightAndFillUpperFromInt32x8:
+		return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt32x8(v)
+	case OpMaskedShiftRightAndFillUpperFromInt64x2:
+		return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt64x2(v)
+	case OpMaskedShiftRightAndFillUpperFromInt64x4:
+		return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt64x4(v)
+	case OpMaskedShiftRightAndFillUpperFromInt64x8:
+		return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt64x8(v)
+	case OpMaskedShiftRightAndFillUpperFromUint16x16:
+		return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint16x16(v)
+	case OpMaskedShiftRightAndFillUpperFromUint16x32:
+		return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint16x32(v)
+	case OpMaskedShiftRightAndFillUpperFromUint16x8:
+		return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint16x8(v)
+	case OpMaskedShiftRightAndFillUpperFromUint32x16:
+		return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint32x16(v)
+	case OpMaskedShiftRightAndFillUpperFromUint32x4:
+		return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint32x4(v)
+	case OpMaskedShiftRightAndFillUpperFromUint32x8:
+		return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint32x8(v)
+	case OpMaskedShiftRightAndFillUpperFromUint64x2:
+		return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint64x2(v)
+	case OpMaskedShiftRightAndFillUpperFromUint64x4:
+		return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint64x4(v)
+	case OpMaskedShiftRightAndFillUpperFromUint64x8:
+		return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint64x8(v)
+	case OpMaskedShiftRightInt16x16:
+		return rewriteValueAMD64_OpMaskedShiftRightInt16x16(v)
+	case OpMaskedShiftRightInt16x32:
+		return rewriteValueAMD64_OpMaskedShiftRightInt16x32(v)
+	case OpMaskedShiftRightInt16x8:
+		return rewriteValueAMD64_OpMaskedShiftRightInt16x8(v)
+	case OpMaskedShiftRightInt32x16:
+		return rewriteValueAMD64_OpMaskedShiftRightInt32x16(v)
+	case OpMaskedShiftRightInt32x4:
+		return rewriteValueAMD64_OpMaskedShiftRightInt32x4(v)
+	case OpMaskedShiftRightInt32x8:
+		return rewriteValueAMD64_OpMaskedShiftRightInt32x8(v)
+	case OpMaskedShiftRightInt64x2:
+		return rewriteValueAMD64_OpMaskedShiftRightInt64x2(v)
+	case OpMaskedShiftRightInt64x4:
+		return rewriteValueAMD64_OpMaskedShiftRightInt64x4(v)
+	case OpMaskedShiftRightInt64x8:
+		return rewriteValueAMD64_OpMaskedShiftRightInt64x8(v)
+	case OpMaskedShiftRightSignExtendedInt16x16:
+		return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt16x16(v)
+	case OpMaskedShiftRightSignExtendedInt16x32:
+		return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt16x32(v)
+	case OpMaskedShiftRightSignExtendedInt16x8:
+		return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt16x8(v)
+	case OpMaskedShiftRightSignExtendedInt32x16:
+		return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt32x16(v)
+	case OpMaskedShiftRightSignExtendedInt32x4:
+		return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt32x4(v)
+	case OpMaskedShiftRightSignExtendedInt32x8:
+		return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt32x8(v)
+	case OpMaskedShiftRightSignExtendedInt64x2:
+		return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt64x2(v)
+	case OpMaskedShiftRightSignExtendedInt64x4:
+		return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt64x4(v)
+	case OpMaskedShiftRightSignExtendedInt64x8:
+		return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt64x8(v)
+	case OpMaskedShiftRightSignExtendedUint16x16:
+		return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint16x16(v)
+	case OpMaskedShiftRightSignExtendedUint16x32:
+		return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint16x32(v)
+	case OpMaskedShiftRightSignExtendedUint16x8:
+		return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint16x8(v)
+	case OpMaskedShiftRightSignExtendedUint32x16:
+		return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint32x16(v)
+	case OpMaskedShiftRightSignExtendedUint32x4:
+		return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint32x4(v)
+	case OpMaskedShiftRightSignExtendedUint32x8:
+		return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint32x8(v)
+	case OpMaskedShiftRightSignExtendedUint64x2:
+		return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint64x2(v)
+	case OpMaskedShiftRightSignExtendedUint64x4:
+		return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint64x4(v)
+	case OpMaskedShiftRightSignExtendedUint64x8:
+		return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint64x8(v)
+	case OpMaskedShiftRightUint16x16:
+		return rewriteValueAMD64_OpMaskedShiftRightUint16x16(v)
+	case OpMaskedShiftRightUint16x32:
+		return rewriteValueAMD64_OpMaskedShiftRightUint16x32(v)
+	case OpMaskedShiftRightUint16x8:
+		return rewriteValueAMD64_OpMaskedShiftRightUint16x8(v)
+	case OpMaskedShiftRightUint32x16:
+		return rewriteValueAMD64_OpMaskedShiftRightUint32x16(v)
+	case OpMaskedShiftRightUint32x4:
+		return rewriteValueAMD64_OpMaskedShiftRightUint32x4(v)
+	case OpMaskedShiftRightUint32x8:
+		return rewriteValueAMD64_OpMaskedShiftRightUint32x8(v)
+	case OpMaskedShiftRightUint64x2:
+		return rewriteValueAMD64_OpMaskedShiftRightUint64x2(v)
+	case OpMaskedShiftRightUint64x4:
+		return rewriteValueAMD64_OpMaskedShiftRightUint64x4(v)
+	case OpMaskedShiftRightUint64x8:
+		return rewriteValueAMD64_OpMaskedShiftRightUint64x8(v)
 	case OpMaskedSqrtFloat32x16:
 		return rewriteValueAMD64_OpMaskedSqrtFloat32x16(v)
 	case OpMaskedSqrtFloat32x4:
@@ -3812,6 +4190,54 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpPrefetchCacheStreamed:
 		v.Op = OpAMD64PrefetchNTA
 		return true
+	case OpRotateAllLeftInt32x16:
+		return rewriteValueAMD64_OpRotateAllLeftInt32x16(v)
+	case OpRotateAllLeftInt32x4:
+		return rewriteValueAMD64_OpRotateAllLeftInt32x4(v)
+	case OpRotateAllLeftInt32x8:
+		return rewriteValueAMD64_OpRotateAllLeftInt32x8(v)
+	case OpRotateAllLeftInt64x2:
+		return rewriteValueAMD64_OpRotateAllLeftInt64x2(v)
+	case OpRotateAllLeftInt64x4:
+		return rewriteValueAMD64_OpRotateAllLeftInt64x4(v)
+	case OpRotateAllLeftInt64x8:
+		return rewriteValueAMD64_OpRotateAllLeftInt64x8(v)
+	case OpRotateAllLeftUint32x16:
+		return rewriteValueAMD64_OpRotateAllLeftUint32x16(v)
+	case OpRotateAllLeftUint32x4:
+		return rewriteValueAMD64_OpRotateAllLeftUint32x4(v)
+	case OpRotateAllLeftUint32x8:
+		return rewriteValueAMD64_OpRotateAllLeftUint32x8(v)
+	case OpRotateAllLeftUint64x2:
+		return rewriteValueAMD64_OpRotateAllLeftUint64x2(v)
+	case OpRotateAllLeftUint64x4:
+		return rewriteValueAMD64_OpRotateAllLeftUint64x4(v)
+	case OpRotateAllLeftUint64x8:
+		return rewriteValueAMD64_OpRotateAllLeftUint64x8(v)
+	case OpRotateAllRightInt32x16:
+		return rewriteValueAMD64_OpRotateAllRightInt32x16(v)
+	case OpRotateAllRightInt32x4:
+		return rewriteValueAMD64_OpRotateAllRightInt32x4(v)
+	case OpRotateAllRightInt32x8:
+		return rewriteValueAMD64_OpRotateAllRightInt32x8(v)
+	case OpRotateAllRightInt64x2:
+		return rewriteValueAMD64_OpRotateAllRightInt64x2(v)
+	case OpRotateAllRightInt64x4:
+		return rewriteValueAMD64_OpRotateAllRightInt64x4(v)
+	case OpRotateAllRightInt64x8:
+		return rewriteValueAMD64_OpRotateAllRightInt64x8(v)
+	case OpRotateAllRightUint32x16:
+		return rewriteValueAMD64_OpRotateAllRightUint32x16(v)
+	case OpRotateAllRightUint32x4:
+		return rewriteValueAMD64_OpRotateAllRightUint32x4(v)
+	case OpRotateAllRightUint32x8:
+		return rewriteValueAMD64_OpRotateAllRightUint32x8(v)
+	case OpRotateAllRightUint64x2:
+		return rewriteValueAMD64_OpRotateAllRightUint64x2(v)
+	case OpRotateAllRightUint64x4:
+		return rewriteValueAMD64_OpRotateAllRightUint64x4(v)
+	case OpRotateAllRightUint64x8:
+		return rewriteValueAMD64_OpRotateAllRightUint64x8(v)
 	case OpRotateLeft16:
 		v.Op = OpAMD64ROLW
 		return true
@@ -3824,6 +4250,78 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpRotateLeft8:
 		v.Op = OpAMD64ROLB
 		return true
+	case OpRotateLeftInt32x16:
+		v.Op = OpAMD64VPROLVD512
+		return true
+	case OpRotateLeftInt32x4:
+		v.Op = OpAMD64VPROLVD128
+		return true
+	case OpRotateLeftInt32x8:
+		v.Op = OpAMD64VPROLVD256
+		return true
+	case OpRotateLeftInt64x2:
+		v.Op = OpAMD64VPROLVQ128
+		return true
+	case OpRotateLeftInt64x4:
+		v.Op = OpAMD64VPROLVQ256
+		return true
+	case OpRotateLeftInt64x8:
+		v.Op = OpAMD64VPROLVQ512
+		return true
+	case OpRotateLeftUint32x16:
+		v.Op = OpAMD64VPROLVD512
+		return true
+	case OpRotateLeftUint32x4:
+		v.Op = OpAMD64VPROLVD128
+		return true
+	case OpRotateLeftUint32x8:
+		v.Op = OpAMD64VPROLVD256
+		return true
+	case OpRotateLeftUint64x2:
+		v.Op = OpAMD64VPROLVQ128
+		return true
+	case OpRotateLeftUint64x4:
+		v.Op = OpAMD64VPROLVQ256
+		return true
+	case OpRotateLeftUint64x8:
+		v.Op = OpAMD64VPROLVQ512
+		return true
+	case OpRotateRightInt32x16:
+		v.Op = OpAMD64VPRORVD512
+		return true
+	case OpRotateRightInt32x4:
+		v.Op = OpAMD64VPRORVD128
+		return true
+	case OpRotateRightInt32x8:
+		v.Op = OpAMD64VPRORVD256
+		return true
+	case OpRotateRightInt64x2:
+		v.Op = OpAMD64VPRORVQ128
+		return true
+	case OpRotateRightInt64x4:
+		v.Op = OpAMD64VPRORVQ256
+		return true
+	case OpRotateRightInt64x8:
+		v.Op = OpAMD64VPRORVQ512
+		return true
+	case OpRotateRightUint32x16:
+		v.Op = OpAMD64VPRORVD512
+		return true
+	case OpRotateRightUint32x4:
+		v.Op = OpAMD64VPRORVD128
+		return true
+	case OpRotateRightUint32x8:
+		v.Op = OpAMD64VPRORVD256
+		return true
+	case OpRotateRightUint64x2:
+		v.Op = OpAMD64VPRORVQ128
+		return true
+	case OpRotateRightUint64x4:
+		v.Op = OpAMD64VPRORVQ256
+		return true
+	case OpRotateRightUint64x8:
+		v.Op = OpAMD64VPRORVQ512
+		return true
 	case OpRound32F:
 		v.Op = OpAMD64LoweredRound32F
 		return true
@@ -4070,6 +4568,453 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpSetElemUint64x2(v)
 	case OpSetElemUint8x16:
 		return rewriteValueAMD64_OpSetElemUint8x16(v)
+	case OpShiftAllLeftAndFillUpperFromInt16x16:
+		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt16x16(v)
+	case OpShiftAllLeftAndFillUpperFromInt16x32:
+		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt16x32(v)
+	case OpShiftAllLeftAndFillUpperFromInt16x8:
+		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt16x8(v)
+	case OpShiftAllLeftAndFillUpperFromInt32x16:
+		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt32x16(v)
+	case OpShiftAllLeftAndFillUpperFromInt32x4:
+		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt32x4(v)
+	case OpShiftAllLeftAndFillUpperFromInt32x8:
+		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt32x8(v)
+	case OpShiftAllLeftAndFillUpperFromInt64x2:
+		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt64x2(v)
+	case OpShiftAllLeftAndFillUpperFromInt64x4:
+		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt64x4(v)
+	case OpShiftAllLeftAndFillUpperFromInt64x8:
+		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt64x8(v)
+	case OpShiftAllLeftAndFillUpperFromUint16x16:
+		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint16x16(v)
+	case OpShiftAllLeftAndFillUpperFromUint16x32:
+		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint16x32(v)
+	case OpShiftAllLeftAndFillUpperFromUint16x8:
+		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint16x8(v)
+	case OpShiftAllLeftAndFillUpperFromUint32x16:
+		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint32x16(v)
+	case OpShiftAllLeftAndFillUpperFromUint32x4:
+		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint32x4(v)
+	case OpShiftAllLeftAndFillUpperFromUint32x8:
+		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint32x8(v)
+	case OpShiftAllLeftAndFillUpperFromUint64x2:
+		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint64x2(v)
+	case OpShiftAllLeftAndFillUpperFromUint64x4:
+		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint64x4(v)
+	case OpShiftAllLeftAndFillUpperFromUint64x8:
+		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint64x8(v)
+	case OpShiftAllLeftInt16x16:
+		v.Op = OpAMD64VPSLLW256
+		return true
+	case OpShiftAllLeftInt16x8:
+		v.Op = OpAMD64VPSLLW128
+		return true
+	case OpShiftAllLeftInt32x4:
+		v.Op = OpAMD64VPSLLD128
+		return true
+	case OpShiftAllLeftInt32x8:
+		v.Op = OpAMD64VPSLLD256
+		return true
+	case OpShiftAllLeftInt64x2:
+		v.Op = OpAMD64VPSLLQ128
+		return true
+	case OpShiftAllLeftInt64x4:
+		v.Op = OpAMD64VPSLLQ256
+		return true
+	case OpShiftAllLeftInt64x8:
+		v.Op = OpAMD64VPSLLQ512
+		return true
+	case OpShiftAllLeftUint16x16:
+		v.Op = OpAMD64VPSLLW256
+		return true
+	case OpShiftAllLeftUint16x8:
+		v.Op = OpAMD64VPSLLW128
+		return true
+	case OpShiftAllLeftUint32x4:
+		v.Op = OpAMD64VPSLLD128
+		return true
+	case OpShiftAllLeftUint32x8:
+		v.Op = OpAMD64VPSLLD256
+		return true
+	case OpShiftAllLeftUint64x2:
+		v.Op = OpAMD64VPSLLQ128
+		return true
+	case OpShiftAllLeftUint64x4:
+		v.Op = OpAMD64VPSLLQ256
+		return true
+	case OpShiftAllLeftUint64x8:
+		v.Op = OpAMD64VPSLLQ512
+		return true
+	case OpShiftAllRightAndFillUpperFromInt16x16:
+		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt16x16(v)
+	case OpShiftAllRightAndFillUpperFromInt16x32:
+		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt16x32(v)
+	case OpShiftAllRightAndFillUpperFromInt16x8:
+		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt16x8(v)
+	case OpShiftAllRightAndFillUpperFromInt32x16:
+		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt32x16(v)
+	case OpShiftAllRightAndFillUpperFromInt32x4:
+		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt32x4(v)
+	case OpShiftAllRightAndFillUpperFromInt32x8:
+		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt32x8(v)
+	case OpShiftAllRightAndFillUpperFromInt64x2:
+		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt64x2(v)
+	case OpShiftAllRightAndFillUpperFromInt64x4:
+		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt64x4(v)
+	case OpShiftAllRightAndFillUpperFromInt64x8:
+		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt64x8(v)
+	case OpShiftAllRightAndFillUpperFromUint16x16:
+		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint16x16(v)
+	case OpShiftAllRightAndFillUpperFromUint16x32:
+		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint16x32(v)
+	case OpShiftAllRightAndFillUpperFromUint16x8:
+		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint16x8(v)
+	case OpShiftAllRightAndFillUpperFromUint32x16:
+		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint32x16(v)
+	case OpShiftAllRightAndFillUpperFromUint32x4:
+		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint32x4(v)
+	case OpShiftAllRightAndFillUpperFromUint32x8:
+		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint32x8(v)
+	case OpShiftAllRightAndFillUpperFromUint64x2:
+		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint64x2(v)
+	case OpShiftAllRightAndFillUpperFromUint64x4:
+		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint64x4(v)
+	case OpShiftAllRightAndFillUpperFromUint64x8:
+		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint64x8(v)
+	case OpShiftAllRightInt16x16:
+		v.Op = OpAMD64VPSRLW256
+		return true
+	case OpShiftAllRightInt16x8:
+		v.Op = OpAMD64VPSRLW128
+		return true
+	case OpShiftAllRightInt32x4:
+		v.Op = OpAMD64VPSRLD128
+		return true
+	case OpShiftAllRightInt32x8:
+		v.Op = OpAMD64VPSRLD256
+		return true
+	case OpShiftAllRightInt64x2:
+		v.Op = OpAMD64VPSRLQ128
+		return true
+	case OpShiftAllRightInt64x4:
+		v.Op = OpAMD64VPSRLQ256
+		return true
+	case OpShiftAllRightInt64x8:
+		v.Op = OpAMD64VPSRLQ512
+		return true
+	case OpShiftAllRightSignExtendedInt16x16:
+		v.Op = OpAMD64VPSRAW256
+		return true
+	case OpShiftAllRightSignExtendedInt16x8:
+		v.Op = OpAMD64VPSRAW128
+		return true
+	case OpShiftAllRightSignExtendedInt32x4:
+		v.Op = OpAMD64VPSRAD128
+		return true
+	case OpShiftAllRightSignExtendedInt32x8:
+		v.Op = OpAMD64VPSRAD256
+		return true
+	case OpShiftAllRightSignExtendedInt64x2:
+		v.Op = OpAMD64VPSRAQ128
+		return true
+	case OpShiftAllRightSignExtendedInt64x4:
+		v.Op = OpAMD64VPSRAQ256
+		return true
+	case OpShiftAllRightSignExtendedInt64x8:
+		v.Op = OpAMD64VPSRAQ512
+		return true
+	case OpShiftAllRightUint16x16:
+		v.Op = OpAMD64VPSRLW256
+		return true
+	case OpShiftAllRightUint16x8:
+		v.Op = OpAMD64VPSRLW128
+		return true
+	case OpShiftAllRightUint32x4:
+		v.Op = OpAMD64VPSRLD128
+		return true
+	case OpShiftAllRightUint32x8:
+		v.Op = OpAMD64VPSRLD256
+		return true
+	case OpShiftAllRightUint64x2:
+		v.Op = OpAMD64VPSRLQ128
+		return true
+	case OpShiftAllRightUint64x4:
+		v.Op = OpAMD64VPSRLQ256
+		return true
+	case OpShiftAllRightUint64x8:
+		v.Op = OpAMD64VPSRLQ512
+		return true
+	case OpShiftLeftAndFillUpperFromInt16x16:
+		v.Op = OpAMD64VPSHLDVW256
+		return true
+	case OpShiftLeftAndFillUpperFromInt16x32:
+		v.Op = OpAMD64VPSHLDVW512
+		return true
+	case OpShiftLeftAndFillUpperFromInt16x8:
+		v.Op = OpAMD64VPSHLDVW128
+		return true
+	case OpShiftLeftAndFillUpperFromInt32x16:
+		v.Op = OpAMD64VPSHLDVD512
+		return true
+	case OpShiftLeftAndFillUpperFromInt32x4:
+		v.Op = OpAMD64VPSHLDVD128
+		return true
+	case OpShiftLeftAndFillUpperFromInt32x8:
+		v.Op = OpAMD64VPSHLDVD256
+		return true
+	case OpShiftLeftAndFillUpperFromInt64x2:
+		v.Op = OpAMD64VPSHLDVQ128
+		return true
+	case OpShiftLeftAndFillUpperFromInt64x4:
+		v.Op = OpAMD64VPSHLDVQ256
+		return true
+	case OpShiftLeftAndFillUpperFromInt64x8:
+		v.Op = OpAMD64VPSHLDVQ512
+		return true
+	case OpShiftLeftAndFillUpperFromUint16x16:
+		v.Op = OpAMD64VPSHLDVW256
+		return true
+	case OpShiftLeftAndFillUpperFromUint16x32:
+		v.Op = OpAMD64VPSHLDVW512
+		return true
+	case OpShiftLeftAndFillUpperFromUint16x8:
+		v.Op = OpAMD64VPSHLDVW128
+		return true
+	case OpShiftLeftAndFillUpperFromUint32x16:
+		v.Op = OpAMD64VPSHLDVD512
+		return true
+	case OpShiftLeftAndFillUpperFromUint32x4:
+		v.Op = OpAMD64VPSHLDVD128
+		return true
+	case OpShiftLeftAndFillUpperFromUint32x8:
+		v.Op = OpAMD64VPSHLDVD256
+		return true
+	case OpShiftLeftAndFillUpperFromUint64x2:
+		v.Op = OpAMD64VPSHLDVQ128
+		return true
+	case OpShiftLeftAndFillUpperFromUint64x4:
+		v.Op = OpAMD64VPSHLDVQ256
+		return true
+	case OpShiftLeftAndFillUpperFromUint64x8:
+		v.Op = OpAMD64VPSHLDVQ512
+		return true
+	case OpShiftLeftInt16x16:
+		v.Op = OpAMD64VPSLLVW256
+		return true
+	case OpShiftLeftInt16x32:
+		v.Op = OpAMD64VPSLLVW512
+		return true
+	case OpShiftLeftInt16x8:
+		v.Op = OpAMD64VPSLLVW128
+		return true
+	case OpShiftLeftInt32x16:
+		v.Op = OpAMD64VPSLLVD512
+		return true
+	case OpShiftLeftInt32x4:
+		v.Op = OpAMD64VPSLLVD128
+		return true
+	case OpShiftLeftInt32x8:
+		v.Op = OpAMD64VPSLLVD256
+		return true
+	case OpShiftLeftInt64x2:
+		v.Op = OpAMD64VPSLLVQ128
+		return true
+	case OpShiftLeftInt64x4:
+		v.Op = OpAMD64VPSLLVQ256
+		return true
+	case OpShiftLeftInt64x8:
+		v.Op = OpAMD64VPSLLVQ512
+		return true
+	case OpShiftLeftUint16x16:
+		v.Op = OpAMD64VPSLLVW256
+		return true
+	case OpShiftLeftUint16x32:
+		v.Op = OpAMD64VPSLLVW512
+		return true
+	case OpShiftLeftUint16x8:
+		v.Op = OpAMD64VPSLLVW128
+		return true
+	case OpShiftLeftUint32x16:
+		v.Op = OpAMD64VPSLLVD512
+		return true
+	case OpShiftLeftUint32x4:
+		v.Op = OpAMD64VPSLLVD128
+		return true
+	case OpShiftLeftUint32x8:
+		v.Op = OpAMD64VPSLLVD256
+		return true
+	case OpShiftLeftUint64x2:
+		v.Op = OpAMD64VPSLLVQ128
+		return true
+	case OpShiftLeftUint64x4:
+		v.Op = OpAMD64VPSLLVQ256
+		return true
+	case OpShiftLeftUint64x8:
+		v.Op = OpAMD64VPSLLVQ512
+		return true
+	case OpShiftRightAndFillUpperFromInt16x16:
+		v.Op = OpAMD64VPSHRDVW256
+		return true
+	case OpShiftRightAndFillUpperFromInt16x32:
+		v.Op = OpAMD64VPSHRDVW512
+		return true
+	case OpShiftRightAndFillUpperFromInt16x8:
+		v.Op = OpAMD64VPSHRDVW128
+		return true
+	case OpShiftRightAndFillUpperFromInt32x16:
+		v.Op = OpAMD64VPSHRDVD512
+		return true
+	case OpShiftRightAndFillUpperFromInt32x4:
+		v.Op = OpAMD64VPSHRDVD128
+		return true
+	case OpShiftRightAndFillUpperFromInt32x8:
+		v.Op = OpAMD64VPSHRDVD256
+		return true
+	case OpShiftRightAndFillUpperFromInt64x2:
+		v.Op = OpAMD64VPSHRDVQ128
+		return true
+	case OpShiftRightAndFillUpperFromInt64x4:
+		v.Op = OpAMD64VPSHRDVQ256
+		return true
+	case OpShiftRightAndFillUpperFromInt64x8:
+		v.Op = OpAMD64VPSHRDVQ512
+		return true
+	case OpShiftRightAndFillUpperFromUint16x16:
+		v.Op = OpAMD64VPSHRDVW256
+		return true
+	case OpShiftRightAndFillUpperFromUint16x32:
+		v.Op = OpAMD64VPSHRDVW512
+		return true
+	case OpShiftRightAndFillUpperFromUint16x8:
+		v.Op = OpAMD64VPSHRDVW128
+		return true
+	case OpShiftRightAndFillUpperFromUint32x16:
+		v.Op = OpAMD64VPSHRDVD512
+		return true
+	case OpShiftRightAndFillUpperFromUint32x4:
+		v.Op = OpAMD64VPSHRDVD128
+		return true
+	case OpShiftRightAndFillUpperFromUint32x8:
+		v.Op = OpAMD64VPSHRDVD256
+		return true
+	case OpShiftRightAndFillUpperFromUint64x2:
+		v.Op = OpAMD64VPSHRDVQ128
+		return true
+	case OpShiftRightAndFillUpperFromUint64x4:
+		v.Op = OpAMD64VPSHRDVQ256
+		return true
+	case OpShiftRightAndFillUpperFromUint64x8:
+		v.Op = OpAMD64VPSHRDVQ512
+		return true
+	case OpShiftRightInt16x16:
+		v.Op = OpAMD64VPSRLVW256
+		return true
+	case OpShiftRightInt16x32:
+		v.Op = OpAMD64VPSRLVW512
+		return true
+	case OpShiftRightInt16x8:
+		v.Op = OpAMD64VPSRLVW128
+		return true
+	case OpShiftRightInt32x16:
+		v.Op = OpAMD64VPSRLVD512
+		return true
+	case OpShiftRightInt32x4:
+		v.Op = OpAMD64VPSRLVD128
+		return true
+	case OpShiftRightInt32x8:
+		v.Op = OpAMD64VPSRLVD256
+		return true
+	case OpShiftRightInt64x2:
+		v.Op = OpAMD64VPSRLVQ128
+		return true
+	case OpShiftRightInt64x4:
+		v.Op = OpAMD64VPSRLVQ256
+		return true
+	case OpShiftRightInt64x8:
+		v.Op = OpAMD64VPSRLVQ512
+		return true
+	case OpShiftRightSignExtendedInt16x16:
+		v.Op = OpAMD64VPSRAVW256
+		return true
+	case OpShiftRightSignExtendedInt16x32:
+		v.Op = OpAMD64VPSRAVW512
+		return true
+	case OpShiftRightSignExtendedInt16x8:
+		v.Op = OpAMD64VPSRAVW128
+		return true
+	case OpShiftRightSignExtendedInt32x16:
+		v.Op = OpAMD64VPSRAVD512
+		return true
+	case OpShiftRightSignExtendedInt32x4:
+		v.Op = OpAMD64VPSRAVD128
+		return true
+	case OpShiftRightSignExtendedInt32x8:
+		v.Op = OpAMD64VPSRAVD256
+		return true
+	case OpShiftRightSignExtendedInt64x2:
+		v.Op = OpAMD64VPSRAVQ128
+		return true
+	case OpShiftRightSignExtendedInt64x4:
+		v.Op = OpAMD64VPSRAVQ256
+		return true
+	case OpShiftRightSignExtendedInt64x8:
+		v.Op = OpAMD64VPSRAVQ512
+		return true
+	case OpShiftRightSignExtendedUint16x16:
+		v.Op = OpAMD64VPSRAVW256
+		return true
+	case OpShiftRightSignExtendedUint16x32:
+		v.Op = OpAMD64VPSRAVW512
+		return true
+	case OpShiftRightSignExtendedUint16x8:
+		v.Op = OpAMD64VPSRAVW128
+		return true
+	case OpShiftRightSignExtendedUint32x16:
+		v.Op = OpAMD64VPSRAVD512
+		return true
+	case OpShiftRightSignExtendedUint32x4:
+		v.Op = OpAMD64VPSRAVD128
+		return true
+	case OpShiftRightSignExtendedUint32x8:
+		v.Op = OpAMD64VPSRAVD256
+		return true
+	case OpShiftRightSignExtendedUint64x2:
+		v.Op = OpAMD64VPSRAVQ128
+		return true
+	case OpShiftRightSignExtendedUint64x4:
+		v.Op = OpAMD64VPSRAVQ256
+		return true
+	case OpShiftRightSignExtendedUint64x8:
+		v.Op = OpAMD64VPSRAVQ512
+		return true
+	case OpShiftRightUint16x16:
+		v.Op = OpAMD64VPSRLVW256
+		return true
+	case OpShiftRightUint16x32:
+		v.Op = OpAMD64VPSRLVW512
+		return true
+	case OpShiftRightUint16x8:
+		v.Op = OpAMD64VPSRLVW128
+		return true
+	case OpShiftRightUint32x16:
+		v.Op = OpAMD64VPSRLVD512
+		return true
+	case OpShiftRightUint32x4:
+		v.Op = OpAMD64VPSRLVD128
+		return true
+	case OpShiftRightUint32x8:
+		v.Op = OpAMD64VPSRLVD256
+		return true
+	case OpShiftRightUint64x2:
+		v.Op = OpAMD64VPSRLVQ128
+		return true
+	case OpShiftRightUint64x4:
+		v.Op = OpAMD64VPSRLVQ256
+		return true
+	case OpShiftRightUint64x8:
+		v.Op = OpAMD64VPSRLVQ512
+		return true
 	case OpSignExt16to32:
 		v.Op = OpAMD64MOVWQSX
 		return true
@@ -43973,6 +44918,870 @@ func rewriteValueAMD64_OpMaskedPopCountUint8x64(v *Value) bool {
 		return true
 	}
 }
+func rewriteValueAMD64_OpMaskedRotateAllLeftInt32x16(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateAllLeftInt32x16 [a] x mask)
+	// result: (VPROLDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPROLDMasked512)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateAllLeftInt32x4(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateAllLeftInt32x4 [a] x mask)
+	// result: (VPROLDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPROLDMasked128)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateAllLeftInt32x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateAllLeftInt32x8 [a] x mask)
+	// result: (VPROLDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPROLDMasked256)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateAllLeftInt64x2(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateAllLeftInt64x2 [a] x mask)
+	// result: (VPROLQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPROLQMasked128)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateAllLeftInt64x4(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateAllLeftInt64x4 [a] x mask)
+	// result: (VPROLQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPROLQMasked256)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateAllLeftInt64x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateAllLeftInt64x8 [a] x mask)
+	// result: (VPROLQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPROLQMasked512)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateAllLeftUint32x16(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateAllLeftUint32x16 [a] x mask)
+	// result: (VPROLDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPROLDMasked512)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateAllLeftUint32x4(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateAllLeftUint32x4 [a] x mask)
+	// result: (VPROLDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPROLDMasked128)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateAllLeftUint32x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateAllLeftUint32x8 [a] x mask)
+	// result: (VPROLDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPROLDMasked256)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateAllLeftUint64x2(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateAllLeftUint64x2 [a] x mask)
+	// result: (VPROLQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPROLQMasked128)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateAllLeftUint64x4(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateAllLeftUint64x4 [a] x mask)
+	// result: (VPROLQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPROLQMasked256)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateAllLeftUint64x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateAllLeftUint64x8 [a] x mask)
+	// result: (VPROLQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPROLQMasked512)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateAllRightInt32x16(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateAllRightInt32x16 [a] x mask)
+	// result: (VPRORDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPRORDMasked512)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateAllRightInt32x4(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateAllRightInt32x4 [a] x mask)
+	// result: (VPRORDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPRORDMasked128)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateAllRightInt32x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateAllRightInt32x8 [a] x mask)
+	// result: (VPRORDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPRORDMasked256)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateAllRightInt64x2(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateAllRightInt64x2 [a] x mask)
+	// result: (VPRORQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPRORQMasked128)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateAllRightInt64x4(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateAllRightInt64x4 [a] x mask)
+	// result: (VPRORQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPRORQMasked256)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateAllRightInt64x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateAllRightInt64x8 [a] x mask)
+	// result: (VPRORQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPRORQMasked512)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateAllRightUint32x16(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateAllRightUint32x16 [a] x mask)
+	// result: (VPRORDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPRORDMasked512)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateAllRightUint32x4(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateAllRightUint32x4 [a] x mask)
+	// result: (VPRORDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPRORDMasked128)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateAllRightUint32x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateAllRightUint32x8 [a] x mask)
+	// result: (VPRORDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPRORDMasked256)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateAllRightUint64x2(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateAllRightUint64x2 [a] x mask)
+	// result: (VPRORQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPRORQMasked128)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateAllRightUint64x4(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateAllRightUint64x4 [a] x mask)
+	// result: (VPRORQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPRORQMasked256)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateAllRightUint64x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateAllRightUint64x8 [a] x mask)
+	// result: (VPRORQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		mask := v_1
+		v.reset(OpAMD64VPRORQMasked512)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg2(x, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateLeftInt32x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateLeftInt32x16 x y mask)
+	// result: (VPROLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPROLVDMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateLeftInt32x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateLeftInt32x4 x y mask)
+	// result: (VPROLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPROLVDMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateLeftInt32x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateLeftInt32x8 x y mask)
+	// result: (VPROLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPROLVDMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateLeftInt64x2(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateLeftInt64x2 x y mask)
+	// result: (VPROLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPROLVQMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateLeftInt64x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateLeftInt64x4 x y mask)
+	// result: (VPROLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPROLVQMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateLeftInt64x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateLeftInt64x8 x y mask)
+	// result: (VPROLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPROLVQMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateLeftUint32x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateLeftUint32x16 x y mask)
+	// result: (VPROLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPROLVDMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateLeftUint32x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateLeftUint32x4 x y mask)
+	// result: (VPROLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPROLVDMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateLeftUint32x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateLeftUint32x8 x y mask)
+	// result: (VPROLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPROLVDMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateLeftUint64x2(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateLeftUint64x2 x y mask)
+	// result: (VPROLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPROLVQMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateLeftUint64x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateLeftUint64x4 x y mask)
+	// result: (VPROLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPROLVQMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateLeftUint64x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateLeftUint64x8 x y mask)
+	// result: (VPROLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPROLVQMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateRightInt32x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateRightInt32x16 x y mask)
+	// result: (VPRORVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPRORVDMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateRightInt32x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateRightInt32x4 x y mask)
+	// result: (VPRORVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPRORVDMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateRightInt32x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateRightInt32x8 x y mask)
+	// result: (VPRORVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPRORVDMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateRightInt64x2(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateRightInt64x2 x y mask)
+	// result: (VPRORVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPRORVQMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateRightInt64x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateRightInt64x4 x y mask)
+	// result: (VPRORVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPRORVQMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateRightInt64x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateRightInt64x8 x y mask)
+	// result: (VPRORVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPRORVQMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateRightUint32x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateRightUint32x16 x y mask)
+	// result: (VPRORVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPRORVDMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateRightUint32x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateRightUint32x4 x y mask)
+	// result: (VPRORVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPRORVDMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateRightUint32x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateRightUint32x8 x y mask)
+	// result: (VPRORVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPRORVDMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateRightUint64x2(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateRightUint64x2 x y mask)
+	// result: (VPRORVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPRORVQMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateRightUint64x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateRightUint64x4 x y mask)
+	// result: (VPRORVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPRORVQMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedRotateRightUint64x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedRotateRightUint64x8 x y mask)
+	// result: (VPRORVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPRORVQMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
 func rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat32x16(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -44855,6 +46664,2688 @@ func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint3
 		return true
 	}
 }
+func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt16x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllLeftAndFillUpperFromInt16x16 [a] x y mask)
+	// result: (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHLDWMasked256)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt16x32(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllLeftAndFillUpperFromInt16x32 [a] x y mask)
+	// result: (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHLDWMasked512)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt16x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllLeftAndFillUpperFromInt16x8 [a] x y mask)
+	// result: (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHLDWMasked128)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt32x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllLeftAndFillUpperFromInt32x16 [a] x y mask)
+	// result: (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHLDDMasked512)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt32x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllLeftAndFillUpperFromInt32x4 [a] x y mask)
+	// result: (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHLDDMasked128)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt32x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllLeftAndFillUpperFromInt32x8 [a] x y mask)
+	// result: (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHLDDMasked256)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt64x2(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllLeftAndFillUpperFromInt64x2 [a] x y mask)
+	// result: (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHLDQMasked128)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt64x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllLeftAndFillUpperFromInt64x4 [a] x y mask)
+	// result: (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHLDQMasked256)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt64x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllLeftAndFillUpperFromInt64x8 [a] x y mask)
+	// result: (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHLDQMasked512)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint16x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllLeftAndFillUpperFromUint16x16 [a] x y mask)
+	// result: (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHLDWMasked256)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint16x32(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllLeftAndFillUpperFromUint16x32 [a] x y mask)
+	// result: (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHLDWMasked512)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint16x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllLeftAndFillUpperFromUint16x8 [a] x y mask)
+	// result: (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHLDWMasked128)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint32x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllLeftAndFillUpperFromUint32x16 [a] x y mask)
+	// result: (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHLDDMasked512)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint32x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllLeftAndFillUpperFromUint32x4 [a] x y mask)
+	// result: (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHLDDMasked128)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint32x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllLeftAndFillUpperFromUint32x8 [a] x y mask)
+	// result: (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHLDDMasked256)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint64x2(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllLeftAndFillUpperFromUint64x2 [a] x y mask)
+	// result: (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHLDQMasked128)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint64x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllLeftAndFillUpperFromUint64x4 [a] x y mask)
+	// result: (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHLDQMasked256)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint64x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllLeftAndFillUpperFromUint64x8 [a] x y mask)
+	// result: (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHLDQMasked512)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllLeftInt64x2(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllLeftInt64x2 x y mask)
+	// result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLQMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllLeftInt64x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllLeftInt64x4 x y mask)
+	// result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLQMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllLeftInt64x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllLeftInt64x8 x y mask)
+	// result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLQMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllLeftUint64x2(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllLeftUint64x2 x y mask)
+	// result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLQMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllLeftUint64x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllLeftUint64x4 x y mask)
+	// result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLQMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllLeftUint64x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllLeftUint64x8 x y mask)
+	// result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLQMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt16x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightAndFillUpperFromInt16x16 [a] x y mask)
+	// result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHRDWMasked256)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt16x32(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightAndFillUpperFromInt16x32 [a] x y mask)
+	// result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHRDWMasked512)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt16x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightAndFillUpperFromInt16x8 [a] x y mask)
+	// result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHRDWMasked128)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt32x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightAndFillUpperFromInt32x16 [a] x y mask)
+	// result: (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHRDDMasked512)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt32x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightAndFillUpperFromInt32x4 [a] x y mask)
+	// result: (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHRDDMasked128)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt32x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightAndFillUpperFromInt32x8 [a] x y mask)
+	// result: (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHRDDMasked256)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt64x2(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightAndFillUpperFromInt64x2 [a] x y mask)
+	// result: (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHRDQMasked128)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt64x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightAndFillUpperFromInt64x4 [a] x y mask)
+	// result: (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHRDQMasked256)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt64x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightAndFillUpperFromInt64x8 [a] x y mask)
+	// result: (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHRDQMasked512)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint16x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightAndFillUpperFromUint16x16 [a] x y mask)
+	// result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHRDWMasked256)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint16x32(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightAndFillUpperFromUint16x32 [a] x y mask)
+	// result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHRDWMasked512)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint16x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightAndFillUpperFromUint16x8 [a] x y mask)
+	// result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHRDWMasked128)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint32x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightAndFillUpperFromUint32x16 [a] x y mask)
+	// result: (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHRDDMasked512)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint32x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightAndFillUpperFromUint32x4 [a] x y mask)
+	// result: (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHRDDMasked128)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint32x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightAndFillUpperFromUint32x8 [a] x y mask)
+	// result: (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHRDDMasked256)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint64x2(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightAndFillUpperFromUint64x2 [a] x y mask)
+	// result: (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHRDQMasked128)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint64x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightAndFillUpperFromUint64x4 [a] x y mask)
+	// result: (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHRDQMasked256)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint64x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightAndFillUpperFromUint64x8 [a] x y mask)
+	// result: (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSHRDQMasked512)
+		v.AuxInt = int8ToAuxInt(a)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightInt64x2(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightInt64x2 x y mask)
+	// result: (VPSRLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLQMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightInt64x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightInt64x4 x y mask)
+	// result: (VPSRLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLQMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightInt64x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightInt64x8 x y mask)
+	// result: (VPSRLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLQMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightSignExtendedInt64x2(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightSignExtendedInt64x2 x y mask)
+	// result: (VPSRAQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRAQMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightSignExtendedInt64x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightSignExtendedInt64x4 x y mask)
+	// result: (VPSRAQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRAQMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightSignExtendedInt64x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightSignExtendedInt64x8 x y mask)
+	// result: (VPSRAQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRAQMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightUint64x2(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightUint64x2 x y mask)
+	// result: (VPSRLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLQMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightUint64x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightUint64x4 x y mask)
+	// result: (VPSRLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLQMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftAllRightUint64x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftAllRightUint64x8 x y mask)
+	// result: (VPSRLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLQMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt16x16(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftAndFillUpperFromInt16x16 x y z mask)
+	// result: (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHLDVWMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt16x32(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftAndFillUpperFromInt16x32 x y z mask)
+	// result: (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHLDVWMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt16x8(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftAndFillUpperFromInt16x8 x y z mask)
+	// result: (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHLDVWMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt32x16(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftAndFillUpperFromInt32x16 x y z mask)
+	// result: (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHLDVDMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt32x4(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftAndFillUpperFromInt32x4 x y z mask)
+	// result: (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHLDVDMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt32x8(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftAndFillUpperFromInt32x8 x y z mask)
+	// result: (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHLDVDMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt64x2(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftAndFillUpperFromInt64x2 x y z mask)
+	// result: (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHLDVQMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt64x4(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftAndFillUpperFromInt64x4 x y z mask)
+	// result: (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHLDVQMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt64x8(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftAndFillUpperFromInt64x8 x y z mask)
+	// result: (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHLDVQMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint16x16(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftAndFillUpperFromUint16x16 x y z mask)
+	// result: (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHLDVWMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint16x32(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftAndFillUpperFromUint16x32 x y z mask)
+	// result: (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHLDVWMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint16x8(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftAndFillUpperFromUint16x8 x y z mask)
+	// result: (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHLDVWMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint32x16(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftAndFillUpperFromUint32x16 x y z mask)
+	// result: (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHLDVDMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint32x4(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftAndFillUpperFromUint32x4 x y z mask)
+	// result: (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHLDVDMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint32x8(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftAndFillUpperFromUint32x8 x y z mask)
+	// result: (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHLDVDMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint64x2(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftAndFillUpperFromUint64x2 x y z mask)
+	// result: (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHLDVQMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint64x4(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftAndFillUpperFromUint64x4 x y z mask)
+	// result: (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHLDVQMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint64x8(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftAndFillUpperFromUint64x8 x y z mask)
+	// result: (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHLDVQMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftInt16x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftInt16x16 x y mask)
+	// result: (VPSLLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLVWMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftInt16x32(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftInt16x32 x y mask)
+	// result: (VPSLLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLVWMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftInt16x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftInt16x8 x y mask)
+	// result: (VPSLLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLVWMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftInt32x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftInt32x16 x y mask)
+	// result: (VPSLLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLVDMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftInt32x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftInt32x4 x y mask)
+	// result: (VPSLLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLVDMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftInt32x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftInt32x8 x y mask)
+	// result: (VPSLLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLVDMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftInt64x2(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftInt64x2 x y mask)
+	// result: (VPSLLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLVQMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftInt64x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftInt64x4 x y mask)
+	// result: (VPSLLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLVQMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftInt64x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftInt64x8 x y mask)
+	// result: (VPSLLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLVQMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftUint16x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftUint16x16 x y mask)
+	// result: (VPSLLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLVWMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftUint16x32(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftUint16x32 x y mask)
+	// result: (VPSLLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLVWMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftUint16x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftUint16x8 x y mask)
+	// result: (VPSLLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLVWMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftUint32x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftUint32x16 x y mask)
+	// result: (VPSLLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLVDMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftUint32x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftUint32x4 x y mask)
+	// result: (VPSLLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLVDMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftUint32x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftUint32x8 x y mask)
+	// result: (VPSLLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLVDMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftUint64x2(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftUint64x2 x y mask)
+	// result: (VPSLLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLVQMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftUint64x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftUint64x4 x y mask)
+	// result: (VPSLLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLVQMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftLeftUint64x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftLeftUint64x8 x y mask)
+	// result: (VPSLLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLVQMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt16x16(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightAndFillUpperFromInt16x16 x y z mask)
+	// result: (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHRDVWMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt16x32(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightAndFillUpperFromInt16x32 x y z mask)
+	// result: (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHRDVWMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt16x8(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightAndFillUpperFromInt16x8 x y z mask)
+	// result: (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHRDVWMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt32x16(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightAndFillUpperFromInt32x16 x y z mask)
+	// result: (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHRDVDMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt32x4(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightAndFillUpperFromInt32x4 x y z mask)
+	// result: (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHRDVDMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt32x8(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightAndFillUpperFromInt32x8 x y z mask)
+	// result: (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHRDVDMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt64x2(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightAndFillUpperFromInt64x2 x y z mask)
+	// result: (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHRDVQMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt64x4(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightAndFillUpperFromInt64x4 x y z mask)
+	// result: (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHRDVQMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt64x8(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightAndFillUpperFromInt64x8 x y z mask)
+	// result: (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHRDVQMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint16x16(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightAndFillUpperFromUint16x16 x y z mask)
+	// result: (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHRDVWMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint16x32(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightAndFillUpperFromUint16x32 x y z mask)
+	// result: (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHRDVWMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint16x8(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightAndFillUpperFromUint16x8 x y z mask)
+	// result: (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHRDVWMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint32x16(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightAndFillUpperFromUint32x16 x y z mask)
+	// result: (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHRDVDMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint32x4(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightAndFillUpperFromUint32x4 x y z mask)
+	// result: (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHRDVDMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint32x8(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightAndFillUpperFromUint32x8 x y z mask)
+	// result: (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHRDVDMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint64x2(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightAndFillUpperFromUint64x2 x y z mask)
+	// result: (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHRDVQMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint64x4(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightAndFillUpperFromUint64x4 x y z mask)
+	// result: (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHRDVQMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint64x8(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightAndFillUpperFromUint64x8 x y z mask)
+	// result: (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		z := v_2
+		mask := v_3
+		v.reset(OpAMD64VPSHRDVQMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg4(x, y, z, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightInt16x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightInt16x16 x y mask)
+	// result: (VPSRLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLVWMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightInt16x32(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightInt16x32 x y mask)
+	// result: (VPSRLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLVWMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightInt16x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightInt16x8 x y mask)
+	// result: (VPSRLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLVWMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightInt32x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightInt32x16 x y mask)
+	// result: (VPSRLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLVDMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightInt32x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightInt32x4 x y mask)
+	// result: (VPSRLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLVDMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightInt32x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightInt32x8 x y mask)
+	// result: (VPSRLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLVDMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightInt64x2(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightInt64x2 x y mask)
+	// result: (VPSRLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLVQMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightInt64x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightInt64x4 x y mask)
+	// result: (VPSRLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLVQMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightInt64x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightInt64x8 x y mask)
+	// result: (VPSRLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLVQMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt16x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightSignExtendedInt16x16 x y mask)
+	// result: (VPSRAVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRAVWMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt16x32(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightSignExtendedInt16x32 x y mask)
+	// result: (VPSRAVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRAVWMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt16x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightSignExtendedInt16x8 x y mask)
+	// result: (VPSRAVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRAVWMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt32x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightSignExtendedInt32x16 x y mask)
+	// result: (VPSRAVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRAVDMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt32x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightSignExtendedInt32x4 x y mask)
+	// result: (VPSRAVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRAVDMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt32x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightSignExtendedInt32x8 x y mask)
+	// result: (VPSRAVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRAVDMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt64x2(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightSignExtendedInt64x2 x y mask)
+	// result: (VPSRAVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRAVQMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt64x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightSignExtendedInt64x4 x y mask)
+	// result: (VPSRAVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRAVQMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt64x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightSignExtendedInt64x8 x y mask)
+	// result: (VPSRAVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRAVQMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint16x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightSignExtendedUint16x16 x y mask)
+	// result: (VPSRAVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRAVWMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint16x32(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightSignExtendedUint16x32 x y mask)
+	// result: (VPSRAVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRAVWMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint16x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightSignExtendedUint16x8 x y mask)
+	// result: (VPSRAVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRAVWMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint32x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightSignExtendedUint32x16 x y mask)
+	// result: (VPSRAVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRAVDMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint32x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightSignExtendedUint32x4 x y mask)
+	// result: (VPSRAVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRAVDMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint32x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightSignExtendedUint32x8 x y mask)
+	// result: (VPSRAVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRAVDMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint64x2(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightSignExtendedUint64x2 x y mask)
+	// result: (VPSRAVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRAVQMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint64x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightSignExtendedUint64x4 x y mask)
+	// result: (VPSRAVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRAVQMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint64x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightSignExtendedUint64x8 x y mask)
+	// result: (VPSRAVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRAVQMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightUint16x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightUint16x16 x y mask)
+	// result: (VPSRLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLVWMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightUint16x32(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightUint16x32 x y mask)
+	// result: (VPSRLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLVWMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightUint16x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightUint16x8 x y mask)
+	// result: (VPSRLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLVWMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightUint32x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightUint32x16 x y mask)
+	// result: (VPSRLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLVDMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightUint32x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightUint32x4 x y mask)
+	// result: (VPSRLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLVDMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightUint32x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightUint32x8 x y mask)
+	// result: (VPSRLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLVDMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightUint64x2(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightUint64x2 x y mask)
+	// result: (VPSRLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLVQMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightUint64x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightUint64x4 x y mask)
+	// result: (VPSRLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLVQMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpMaskedShiftRightUint64x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (MaskedShiftRightUint64x8 x y mask)
+	// result: (VPSRLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLVQMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
 func rewriteValueAMD64_OpMaskedSqrtFloat32x16(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -47629,6 +52120,318 @@ func rewriteValueAMD64_OpPopCount8(v *Value) bool {
 		return true
 	}
 }
+func rewriteValueAMD64_OpRotateAllLeftInt32x16(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (RotateAllLeftInt32x16 [a] x)
+	// result: (VPROLD512 [a] x)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		v.reset(OpAMD64VPROLD512)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg(x)
+		return true
+	}
+}
+func rewriteValueAMD64_OpRotateAllLeftInt32x4(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (RotateAllLeftInt32x4 [a] x)
+	// result: (VPROLD128 [a] x)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		v.reset(OpAMD64VPROLD128)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg(x)
+		return true
+	}
+}
+func rewriteValueAMD64_OpRotateAllLeftInt32x8(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (RotateAllLeftInt32x8 [a] x)
+	// result: (VPROLD256 [a] x)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		v.reset(OpAMD64VPROLD256)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg(x)
+		return true
+	}
+}
+func rewriteValueAMD64_OpRotateAllLeftInt64x2(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (RotateAllLeftInt64x2 [a] x)
+	// result: (VPROLQ128 [a] x)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		v.reset(OpAMD64VPROLQ128)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg(x)
+		return true
+	}
+}
+func rewriteValueAMD64_OpRotateAllLeftInt64x4(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (RotateAllLeftInt64x4 [a] x)
+	// result: (VPROLQ256 [a] x)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		v.reset(OpAMD64VPROLQ256)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg(x)
+		return true
+	}
+}
+func rewriteValueAMD64_OpRotateAllLeftInt64x8(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (RotateAllLeftInt64x8 [a] x)
+	// result: (VPROLQ512 [a] x)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		v.reset(OpAMD64VPROLQ512)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg(x)
+		return true
+	}
+}
+func rewriteValueAMD64_OpRotateAllLeftUint32x16(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (RotateAllLeftUint32x16 [a] x)
+	// result: (VPROLD512 [a] x)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		v.reset(OpAMD64VPROLD512)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg(x)
+		return true
+	}
+}
+func rewriteValueAMD64_OpRotateAllLeftUint32x4(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (RotateAllLeftUint32x4 [a] x)
+	// result: (VPROLD128 [a] x)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		v.reset(OpAMD64VPROLD128)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg(x)
+		return true
+	}
+}
+func rewriteValueAMD64_OpRotateAllLeftUint32x8(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (RotateAllLeftUint32x8 [a] x)
+	// result: (VPROLD256 [a] x)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		v.reset(OpAMD64VPROLD256)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg(x)
+		return true
+	}
+}
+func rewriteValueAMD64_OpRotateAllLeftUint64x2(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (RotateAllLeftUint64x2 [a] x)
+	// result: (VPROLQ128 [a] x)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		v.reset(OpAMD64VPROLQ128)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg(x)
+		return true
+	}
+}
+func rewriteValueAMD64_OpRotateAllLeftUint64x4(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (RotateAllLeftUint64x4 [a] x)
+	// result: (VPROLQ256 [a] x)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		v.reset(OpAMD64VPROLQ256)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg(x)
+		return true
+	}
+}
+func rewriteValueAMD64_OpRotateAllLeftUint64x8(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (RotateAllLeftUint64x8 [a] x)
+	// result: (VPROLQ512 [a] x)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		v.reset(OpAMD64VPROLQ512)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg(x)
+		return true
+	}
+}
+func rewriteValueAMD64_OpRotateAllRightInt32x16(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (RotateAllRightInt32x16 [a] x)
+	// result: (VPRORD512 [a] x)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		v.reset(OpAMD64VPRORD512)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg(x)
+		return true
+	}
+}
+func rewriteValueAMD64_OpRotateAllRightInt32x4(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (RotateAllRightInt32x4 [a] x)
+	// result: (VPRORD128 [a] x)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		v.reset(OpAMD64VPRORD128)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg(x)
+		return true
+	}
+}
+func rewriteValueAMD64_OpRotateAllRightInt32x8(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (RotateAllRightInt32x8 [a] x)
+	// result: (VPRORD256 [a] x)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		v.reset(OpAMD64VPRORD256)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg(x)
+		return true
+	}
+}
+func rewriteValueAMD64_OpRotateAllRightInt64x2(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (RotateAllRightInt64x2 [a] x)
+	// result: (VPRORQ128 [a] x)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		v.reset(OpAMD64VPRORQ128)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg(x)
+		return true
+	}
+}
+func rewriteValueAMD64_OpRotateAllRightInt64x4(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (RotateAllRightInt64x4 [a] x)
+	// result: (VPRORQ256 [a] x)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		v.reset(OpAMD64VPRORQ256)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg(x)
+		return true
+	}
+}
+func rewriteValueAMD64_OpRotateAllRightInt64x8(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (RotateAllRightInt64x8 [a] x)
+	// result: (VPRORQ512 [a] x)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		v.reset(OpAMD64VPRORQ512)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg(x)
+		return true
+	}
+}
+func rewriteValueAMD64_OpRotateAllRightUint32x16(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (RotateAllRightUint32x16 [a] x)
+	// result: (VPRORD512 [a] x)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		v.reset(OpAMD64VPRORD512)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg(x)
+		return true
+	}
+}
+func rewriteValueAMD64_OpRotateAllRightUint32x4(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (RotateAllRightUint32x4 [a] x)
+	// result: (VPRORD128 [a] x)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		v.reset(OpAMD64VPRORD128)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg(x)
+		return true
+	}
+}
+func rewriteValueAMD64_OpRotateAllRightUint32x8(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (RotateAllRightUint32x8 [a] x)
+	// result: (VPRORD256 [a] x)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		v.reset(OpAMD64VPRORD256)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg(x)
+		return true
+	}
+}
+func rewriteValueAMD64_OpRotateAllRightUint64x2(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (RotateAllRightUint64x2 [a] x)
+	// result: (VPRORQ128 [a] x)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		v.reset(OpAMD64VPRORQ128)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg(x)
+		return true
+	}
+}
+func rewriteValueAMD64_OpRotateAllRightUint64x4(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (RotateAllRightUint64x4 [a] x)
+	// result: (VPRORQ256 [a] x)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		v.reset(OpAMD64VPRORQ256)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg(x)
+		return true
+	}
+}
+func rewriteValueAMD64_OpRotateAllRightUint64x8(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (RotateAllRightUint64x8 [a] x)
+	// result: (VPRORQ512 [a] x)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		v.reset(OpAMD64VPRORQ512)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg(x)
+		return true
+	}
+}
 func rewriteValueAMD64_OpRoundFloat32x4(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (RoundFloat32x4 x)
@@ -49718,6 +54521,546 @@ func rewriteValueAMD64_OpSetElemUint8x16(v *Value) bool {
 		return true
 	}
 }
+func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt16x16(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllLeftAndFillUpperFromInt16x16 [a] x y)
+	// result: (VPSHLDW256 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHLDW256)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt16x32(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllLeftAndFillUpperFromInt16x32 [a] x y)
+	// result: (VPSHLDW512 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHLDW512)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt16x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllLeftAndFillUpperFromInt16x8 [a] x y)
+	// result: (VPSHLDW128 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHLDW128)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt32x16(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllLeftAndFillUpperFromInt32x16 [a] x y)
+	// result: (VPSHLDD512 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHLDD512)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt32x4(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllLeftAndFillUpperFromInt32x4 [a] x y)
+	// result: (VPSHLDD128 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHLDD128)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt32x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllLeftAndFillUpperFromInt32x8 [a] x y)
+	// result: (VPSHLDD256 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHLDD256)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt64x2(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllLeftAndFillUpperFromInt64x2 [a] x y)
+	// result: (VPSHLDQ128 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHLDQ128)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt64x4(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllLeftAndFillUpperFromInt64x4 [a] x y)
+	// result: (VPSHLDQ256 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHLDQ256)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt64x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllLeftAndFillUpperFromInt64x8 [a] x y)
+	// result: (VPSHLDQ512 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHLDQ512)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint16x16(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllLeftAndFillUpperFromUint16x16 [a] x y)
+	// result: (VPSHLDW256 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHLDW256)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint16x32(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllLeftAndFillUpperFromUint16x32 [a] x y)
+	// result: (VPSHLDW512 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHLDW512)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint16x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllLeftAndFillUpperFromUint16x8 [a] x y)
+	// result: (VPSHLDW128 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHLDW128)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint32x16(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllLeftAndFillUpperFromUint32x16 [a] x y)
+	// result: (VPSHLDD512 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHLDD512)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint32x4(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllLeftAndFillUpperFromUint32x4 [a] x y)
+	// result: (VPSHLDD128 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHLDD128)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint32x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllLeftAndFillUpperFromUint32x8 [a] x y)
+	// result: (VPSHLDD256 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHLDD256)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint64x2(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllLeftAndFillUpperFromUint64x2 [a] x y)
+	// result: (VPSHLDQ128 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHLDQ128)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint64x4(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllLeftAndFillUpperFromUint64x4 [a] x y)
+	// result: (VPSHLDQ256 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHLDQ256)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint64x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllLeftAndFillUpperFromUint64x8 [a] x y)
+	// result: (VPSHLDQ512 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHLDQ512)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt16x16(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllRightAndFillUpperFromInt16x16 [a] x y)
+	// result: (VPSHRDW256 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHRDW256)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt16x32(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllRightAndFillUpperFromInt16x32 [a] x y)
+	// result: (VPSHRDW512 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHRDW512)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt16x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllRightAndFillUpperFromInt16x8 [a] x y)
+	// result: (VPSHRDW128 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHRDW128)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt32x16(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllRightAndFillUpperFromInt32x16 [a] x y)
+	// result: (VPSHRDD512 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHRDD512)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt32x4(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllRightAndFillUpperFromInt32x4 [a] x y)
+	// result: (VPSHRDD128 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHRDD128)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt32x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllRightAndFillUpperFromInt32x8 [a] x y)
+	// result: (VPSHRDD256 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHRDD256)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt64x2(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllRightAndFillUpperFromInt64x2 [a] x y)
+	// result: (VPSHRDQ128 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHRDQ128)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt64x4(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllRightAndFillUpperFromInt64x4 [a] x y)
+	// result: (VPSHRDQ256 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHRDQ256)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt64x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllRightAndFillUpperFromInt64x8 [a] x y)
+	// result: (VPSHRDQ512 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHRDQ512)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint16x16(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllRightAndFillUpperFromUint16x16 [a] x y)
+	// result: (VPSHRDW256 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHRDW256)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint16x32(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllRightAndFillUpperFromUint16x32 [a] x y)
+	// result: (VPSHRDW512 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHRDW512)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint16x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllRightAndFillUpperFromUint16x8 [a] x y)
+	// result: (VPSHRDW128 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHRDW128)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint32x16(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllRightAndFillUpperFromUint32x16 [a] x y)
+	// result: (VPSHRDD512 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHRDD512)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint32x4(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllRightAndFillUpperFromUint32x4 [a] x y)
+	// result: (VPSHRDD128 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHRDD128)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint32x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllRightAndFillUpperFromUint32x8 [a] x y)
+	// result: (VPSHRDD256 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHRDD256)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint64x2(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllRightAndFillUpperFromUint64x2 [a] x y)
+	// result: (VPSHRDQ128 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHRDQ128)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint64x4(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllRightAndFillUpperFromUint64x4 [a] x y)
+	// result: (VPSHRDQ256 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHRDQ256)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint64x8(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ShiftAllRightAndFillUpperFromUint64x8 [a] x y)
+	// result: (VPSHRDQ512 [a] x y)
+	for {
+		a := auxIntToInt8(v.AuxInt)
+		x := v_0
+		y := v_1
+		v.reset(OpAMD64VPSHRDQ512)
+		v.AuxInt = int8ToAuxInt(a)
+		v.AddArg2(x, y)
+		return true
+	}
+}
 func rewriteValueAMD64_OpSlicemask(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go
index 5d6ae7e3c06..d20c9392936 100644
--- a/src/cmd/compile/internal/ssagen/simdintrinsics.go
+++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go
@@ -915,6 +915,54 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int32x4.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftInt32x4, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int32x8.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftInt32x8, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int32x16.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftInt32x16, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int64x2.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftInt64x2, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int64x4.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftInt64x4, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int64x8.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftInt64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x4.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftUint32x4, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x8.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftUint32x8, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x16.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftUint32x16, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x2.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftUint64x2, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x4.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftUint64x4, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x8.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftUint64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int32x4.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightInt32x4, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int32x8.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightInt32x8, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int32x16.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightInt32x16, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int64x2.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightInt64x2, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int64x4.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightInt64x4, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int64x8.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightInt64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x4.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightUint32x4, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x8.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightUint32x8, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x16.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightUint32x16, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x2.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightUint64x2, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x4.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightUint64x4, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x8.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightUint64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int32x4.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftInt32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int64x2.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint32x4.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftUint32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint64x2.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftUint64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint64x4.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftUint64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x8.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftUint64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int32x4.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightInt32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int64x2.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint32x4.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightUint32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint64x2.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightUint64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint64x4.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightUint64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x8.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float32x8.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float32x16.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
@@ -963,6 +1011,147 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint32x4.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint32x8.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint32x16.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int64x2.MaskedShiftAllLeft", opLen3(ssa.OpMaskedShiftAllLeftInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.MaskedShiftAllLeft", opLen3(ssa.OpMaskedShiftAllLeftInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.MaskedShiftAllLeft", opLen3(ssa.OpMaskedShiftAllLeftInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint64x2.MaskedShiftAllLeft", opLen3(ssa.OpMaskedShiftAllLeftUint64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint64x4.MaskedShiftAllLeft", opLen3(ssa.OpMaskedShiftAllLeftUint64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x8.MaskedShiftAllLeft", opLen3(ssa.OpMaskedShiftAllLeftUint64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int16x8.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt16x8, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int16x16.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt16x16, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int16x32.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt16x32, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int32x4.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt32x4, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int32x8.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt32x8, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int32x16.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt32x16, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int64x2.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt64x2, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int64x4.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt64x4, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int64x8.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint16x8.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint16x8, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint16x16.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint16x16, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint16x32.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint16x32, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x4.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint32x4, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x8.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint32x8, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x16.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint32x16, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x2.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint64x2, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x4.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint64x4, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x8.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int64x2.MaskedShiftAllRight", opLen3(ssa.OpMaskedShiftAllRightInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.MaskedShiftAllRight", opLen3(ssa.OpMaskedShiftAllRightInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.MaskedShiftAllRight", opLen3(ssa.OpMaskedShiftAllRightInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint64x2.MaskedShiftAllRight", opLen3(ssa.OpMaskedShiftAllRightUint64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint64x4.MaskedShiftAllRight", opLen3(ssa.OpMaskedShiftAllRightUint64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x8.MaskedShiftAllRight", opLen3(ssa.OpMaskedShiftAllRightUint64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int16x8.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt16x8, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int16x16.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt16x16, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int16x32.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt16x32, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int32x4.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt32x4, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int32x8.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt32x8, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int32x16.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt32x16, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int64x2.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt64x2, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int64x4.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt64x4, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int64x8.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint16x8.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint16x8, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint16x16.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint16x16, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint16x32.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint16x32, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x4.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint32x4, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x8.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint32x8, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x16.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint32x16, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x2.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint64x2, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x4.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint64x4, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x8.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int64x2.MaskedShiftAllRightSignExtended", opLen3(ssa.OpMaskedShiftAllRightSignExtendedInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.MaskedShiftAllRightSignExtended", opLen3(ssa.OpMaskedShiftAllRightSignExtendedInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.MaskedShiftAllRightSignExtended", opLen3(ssa.OpMaskedShiftAllRightSignExtendedInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int16x8.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int16x16.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int32x4.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int64x2.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint16x8.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint16x16.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint32x4.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint64x2.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint64x4.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x8.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int16x8.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int16x16.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int32x4.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int64x2.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint16x8.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint16x16.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint32x4.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint64x2.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint64x4.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x8.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int16x8.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int16x16.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int32x4.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int64x2.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint16x8.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint16x16.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint32x4.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint64x2.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint64x4.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x8.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int16x8.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int16x16.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int32x4.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int64x2.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint16x8.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint16x16.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint32x4.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint64x2.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint64x4.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x8.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int16x8.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int16x16.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int32x4.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int64x2.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint16x8.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint16x16.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint32x4.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint64x2.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint64x4.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x8.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.MaskedSqrt", opLen2(ssa.OpMaskedSqrtFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.MaskedSqrt", opLen2(ssa.OpMaskedSqrtFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.MaskedSqrt", opLen2(ssa.OpMaskedSqrtFloat32x16, types.TypeVec512), sys.AMD64)
@@ -1242,6 +1431,54 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.PopCount", opLen1(ssa.OpPopCountUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.PopCount", opLen1(ssa.OpPopCountUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.PopCount", opLen1(ssa.OpPopCountUint64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int32x4.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x4, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int32x8.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x8, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int32x16.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x16, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int64x2.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt64x2, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int64x4.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt64x4, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int64x8.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x4.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint32x4, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x8.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint32x8, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x16.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint32x16, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x2.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint64x2, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x4.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint64x4, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x8.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int32x4.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt32x4, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int32x8.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt32x8, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int32x16.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt32x16, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int64x2.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt64x2, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int64x4.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt64x4, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int64x8.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x4.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint32x4, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x8.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint32x8, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x16.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint32x16, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x2.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint64x2, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x4.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint64x4, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x8.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int32x4.RotateLeft", opLen2(ssa.OpRotateLeftInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.RotateLeft", opLen2(ssa.OpRotateLeftInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.RotateLeft", opLen2(ssa.OpRotateLeftInt32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int64x2.RotateLeft", opLen2(ssa.OpRotateLeftInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.RotateLeft", opLen2(ssa.OpRotateLeftInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.RotateLeft", opLen2(ssa.OpRotateLeftInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint32x4.RotateLeft", opLen2(ssa.OpRotateLeftUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.RotateLeft", opLen2(ssa.OpRotateLeftUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.RotateLeft", opLen2(ssa.OpRotateLeftUint32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint64x2.RotateLeft", opLen2(ssa.OpRotateLeftUint64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint64x4.RotateLeft", opLen2(ssa.OpRotateLeftUint64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x8.RotateLeft", opLen2(ssa.OpRotateLeftUint64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int32x4.RotateRight", opLen2(ssa.OpRotateRightInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.RotateRight", opLen2(ssa.OpRotateRightInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.RotateRight", opLen2(ssa.OpRotateRightInt32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int64x2.RotateRight", opLen2(ssa.OpRotateRightInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.RotateRight", opLen2(ssa.OpRotateRightInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.RotateRight", opLen2(ssa.OpRotateRightInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint32x4.RotateRight", opLen2(ssa.OpRotateRightUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.RotateRight", opLen2(ssa.OpRotateRightUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.RotateRight", opLen2(ssa.OpRotateRightUint32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint64x2.RotateRight", opLen2(ssa.OpRotateRightUint64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint64x4.RotateRight", opLen2(ssa.OpRotateRightUint64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x8.RotateRight", opLen2(ssa.OpRotateRightUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.Round", opLen1(ssa.OpRoundFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Round", opLen1(ssa.OpRoundFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x2.Round", opLen1(ssa.OpRoundFloat64x2, types.TypeVec128), sys.AMD64)
@@ -1306,6 +1543,167 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint16x8.SetElem", opLen2Imm8(ssa.OpSetElemUint16x8, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Uint32x4.SetElem", opLen2Imm8(ssa.OpSetElemUint32x4, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Uint64x2.SetElem", opLen2Imm8(ssa.OpSetElemUint64x2, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int16x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int16x16.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x4.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x2.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint16x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint16x16.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x4.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x2.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint64x4.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int16x8.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt16x8, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int16x16.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt16x16, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int16x32.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt16x32, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int32x4.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt32x4, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int32x8.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt32x8, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int32x16.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt32x16, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int64x2.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt64x2, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int64x4.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt64x4, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int64x8.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint16x8.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint16x8, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint16x16.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint16x16, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint16x32.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint16x32, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x4.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint32x4, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x8.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint32x8, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x16.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint32x16, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x2.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint64x2, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x4.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint64x4, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x8.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int16x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int16x16.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x4.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x2.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint16x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint16x16.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x4.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x2.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint64x4.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int16x8.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt16x8, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int16x16.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt16x16, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int16x32.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt16x32, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int32x4.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt32x4, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int32x8.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt32x8, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int32x16.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt32x16, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int64x2.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt64x2, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int64x4.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt64x4, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int64x8.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint16x8.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint16x8, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint16x16.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint16x16, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint16x32.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint16x32, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x4.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint32x4, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x8.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint32x8, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x16.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint32x16, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x2.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint64x2, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x4.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint64x4, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x8.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int16x8.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int16x16.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x4.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x2.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int16x8.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int16x16.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int32x4.ShiftLeft", opLen2(ssa.OpShiftLeftInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.ShiftLeft", opLen2(ssa.OpShiftLeftInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.ShiftLeft", opLen2(ssa.OpShiftLeftInt32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int64x2.ShiftLeft", opLen2(ssa.OpShiftLeftInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.ShiftLeft", opLen2(ssa.OpShiftLeftInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.ShiftLeft", opLen2(ssa.OpShiftLeftInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint16x8.ShiftLeft", opLen2(ssa.OpShiftLeftUint16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint16x16.ShiftLeft", opLen2(ssa.OpShiftLeftUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.ShiftLeft", opLen2(ssa.OpShiftLeftUint16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint32x4.ShiftLeft", opLen2(ssa.OpShiftLeftUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.ShiftLeft", opLen2(ssa.OpShiftLeftUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.ShiftLeft", opLen2(ssa.OpShiftLeftUint32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint64x2.ShiftLeft", opLen2(ssa.OpShiftLeftUint64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint64x4.ShiftLeft", opLen2(ssa.OpShiftLeftUint64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x8.ShiftLeft", opLen2(ssa.OpShiftLeftUint64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int16x8.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int16x16.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int32x4.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int64x2.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint16x8.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint16x16.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint32x4.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint64x2.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint64x4.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x8.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int16x8.ShiftRight", opLen2(ssa.OpShiftRightInt16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int16x16.ShiftRight", opLen2(ssa.OpShiftRightInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.ShiftRight", opLen2(ssa.OpShiftRightInt16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int32x4.ShiftRight", opLen2(ssa.OpShiftRightInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.ShiftRight", opLen2(ssa.OpShiftRightInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.ShiftRight", opLen2(ssa.OpShiftRightInt32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int64x2.ShiftRight", opLen2(ssa.OpShiftRightInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.ShiftRight", opLen2(ssa.OpShiftRightInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.ShiftRight", opLen2(ssa.OpShiftRightInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint16x8.ShiftRight", opLen2(ssa.OpShiftRightUint16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint16x16.ShiftRight", opLen2(ssa.OpShiftRightUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.ShiftRight", opLen2(ssa.OpShiftRightUint16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint32x4.ShiftRight", opLen2(ssa.OpShiftRightUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.ShiftRight", opLen2(ssa.OpShiftRightUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.ShiftRight", opLen2(ssa.OpShiftRightUint32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint64x2.ShiftRight", opLen2(ssa.OpShiftRightUint64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint64x4.ShiftRight", opLen2(ssa.OpShiftRightUint64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x8.ShiftRight", opLen2(ssa.OpShiftRightUint64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int16x8.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int16x16.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int32x4.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int64x2.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint16x8.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint16x16.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint32x4.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint64x2.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint64x4.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x8.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int16x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int16x16.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int32x4.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int64x2.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint16x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint16x16.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint32x4.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint64x2.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint64x4.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int8x16.Sign", opLen2(ssa.OpSignInt8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.Sign", opLen2(ssa.OpSignInt8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int16x8.Sign", opLen2(ssa.OpSignInt16x8, types.TypeVec128), sys.AMD64)
diff --git a/src/simd/simd_wrapped_test.go b/src/simd/simd_wrapped_test.go
index b5f6bb517a3..ad828e9d3f8 100644
--- a/src/simd/simd_wrapped_test.go
+++ b/src/simd/simd_wrapped_test.go
@@ -2147,6 +2147,12 @@ func testInt16x8Binary(t *testing.T, v0 []int16, v1 []int16, want []int16, which
 		gotv = vec0.SaturatedPairwiseSub(vec1)
 	case "SaturatedSub":
 		gotv = vec0.SaturatedSub(vec1)
+	case "ShiftLeft":
+		gotv = vec0.ShiftLeft(vec1)
+	case "ShiftRight":
+		gotv = vec0.ShiftRight(vec1)
+	case "ShiftRightSignExtended":
+		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sign":
 		gotv = vec0.Sign(vec1)
 	case "Sub":
@@ -2187,6 +2193,12 @@ func testInt16x8BinaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16, w
 		gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask16x8())
 	case "MaskedSaturatedSub":
 		gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask16x8())
+	case "MaskedShiftLeft":
+		gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask16x8())
+	case "MaskedShiftRight":
+		gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask16x8())
+	case "MaskedShiftRightSignExtended":
+		gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask16x8())
 	case "MaskedSub":
 		gotv = vec0.MaskedSub(vec1, vec2.AsMask16x8())
 
@@ -2307,6 +2319,55 @@ func testInt16x8MaskedCompare(t *testing.T, v0 []int16, v1 []int16, v2 []int16,
 	}
 }
 
+func testInt16x8Ternary(t *testing.T, v0 []int16, v1 []int16, v2 []int16, want []int16, which string) {
+	t.Helper()
+	var gotv simd.Int16x8
+	got := make([]int16, len(want))
+	vec0 := simd.LoadInt16x8Slice(v0)
+	vec1 := simd.LoadInt16x8Slice(v1)
+	vec2 := simd.LoadInt16x8Slice(v2)
+	switch which {
+	case "ShiftLeftAndFillUpperFrom":
+		gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2)
+	case "ShiftRightAndFillUpperFrom":
+		gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2)
+
+	default:
+		t.Errorf("Unknown method: Int16x8.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
+func testInt16x8TernaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16, v3 []int16, want []int16, which string) {
+	t.Helper()
+	var gotv simd.Int16x8
+	got := make([]int16, len(want))
+	vec0 := simd.LoadInt16x8Slice(v0)
+	vec1 := simd.LoadInt16x8Slice(v1)
+	vec2 := simd.LoadInt16x8Slice(v2)
+	vec3 := simd.LoadInt16x8Slice(v3)
+	switch which {
+	case "MaskedShiftLeftAndFillUpperFrom":
+		gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask16x8())
+	case "MaskedShiftRightAndFillUpperFrom":
+		gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask16x8())
+
+	default:
+		t.Errorf("Unknown method: Int16x8.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testInt16x8Unary(t *testing.T, v0 []int16, want []int16, which string) {
 	t.Helper()
 	var gotv simd.Int16x8
@@ -2387,6 +2448,12 @@ func testInt16x16Binary(t *testing.T, v0 []int16, v1 []int16, want []int16, whic
 		gotv = vec0.SaturatedPairwiseSub(vec1)
 	case "SaturatedSub":
 		gotv = vec0.SaturatedSub(vec1)
+	case "ShiftLeft":
+		gotv = vec0.ShiftLeft(vec1)
+	case "ShiftRight":
+		gotv = vec0.ShiftRight(vec1)
+	case "ShiftRightSignExtended":
+		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sign":
 		gotv = vec0.Sign(vec1)
 	case "Sub":
@@ -2427,6 +2494,12 @@ func testInt16x16BinaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16,
 		gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask16x16())
 	case "MaskedSaturatedSub":
 		gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask16x16())
+	case "MaskedShiftLeft":
+		gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask16x16())
+	case "MaskedShiftRight":
+		gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask16x16())
+	case "MaskedShiftRightSignExtended":
+		gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask16x16())
 	case "MaskedSub":
 		gotv = vec0.MaskedSub(vec1, vec2.AsMask16x16())
 
@@ -2547,6 +2620,55 @@ func testInt16x16MaskedCompare(t *testing.T, v0 []int16, v1 []int16, v2 []int16,
 	}
 }
 
+func testInt16x16Ternary(t *testing.T, v0 []int16, v1 []int16, v2 []int16, want []int16, which string) {
+	t.Helper()
+	var gotv simd.Int16x16
+	got := make([]int16, len(want))
+	vec0 := simd.LoadInt16x16Slice(v0)
+	vec1 := simd.LoadInt16x16Slice(v1)
+	vec2 := simd.LoadInt16x16Slice(v2)
+	switch which {
+	case "ShiftLeftAndFillUpperFrom":
+		gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2)
+	case "ShiftRightAndFillUpperFrom":
+		gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2)
+
+	default:
+		t.Errorf("Unknown method: Int16x16.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
+func testInt16x16TernaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16, v3 []int16, want []int16, which string) {
+	t.Helper()
+	var gotv simd.Int16x16
+	got := make([]int16, len(want))
+	vec0 := simd.LoadInt16x16Slice(v0)
+	vec1 := simd.LoadInt16x16Slice(v1)
+	vec2 := simd.LoadInt16x16Slice(v2)
+	vec3 := simd.LoadInt16x16Slice(v3)
+	switch which {
+	case "MaskedShiftLeftAndFillUpperFrom":
+		gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask16x16())
+	case "MaskedShiftRightAndFillUpperFrom":
+		gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask16x16())
+
+	default:
+		t.Errorf("Unknown method: Int16x16.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testInt16x16Unary(t *testing.T, v0 []int16, want []int16, which string) {
 	t.Helper()
 	var gotv simd.Int16x16
@@ -2613,6 +2735,12 @@ func testInt16x32Binary(t *testing.T, v0 []int16, v1 []int16, want []int16, whic
 		gotv = vec0.SaturatedAdd(vec1)
 	case "SaturatedSub":
 		gotv = vec0.SaturatedSub(vec1)
+	case "ShiftLeft":
+		gotv = vec0.ShiftLeft(vec1)
+	case "ShiftRight":
+		gotv = vec0.ShiftRight(vec1)
+	case "ShiftRightSignExtended":
+		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 
@@ -2649,6 +2777,12 @@ func testInt16x32BinaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16,
 		gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask16x32())
 	case "MaskedSaturatedSub":
 		gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask16x32())
+	case "MaskedShiftLeft":
+		gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask16x32())
+	case "MaskedShiftRight":
+		gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask16x32())
+	case "MaskedShiftRightSignExtended":
+		gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask16x32())
 	case "MaskedSub":
 		gotv = vec0.MaskedSub(vec1, vec2.AsMask16x32())
 
@@ -2769,6 +2903,55 @@ func testInt16x32MaskedCompare(t *testing.T, v0 []int16, v1 []int16, v2 []int16,
 	}
 }
 
+func testInt16x32Ternary(t *testing.T, v0 []int16, v1 []int16, v2 []int16, want []int16, which string) {
+	t.Helper()
+	var gotv simd.Int16x32
+	got := make([]int16, len(want))
+	vec0 := simd.LoadInt16x32Slice(v0)
+	vec1 := simd.LoadInt16x32Slice(v1)
+	vec2 := simd.LoadInt16x32Slice(v2)
+	switch which {
+	case "ShiftLeftAndFillUpperFrom":
+		gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2)
+	case "ShiftRightAndFillUpperFrom":
+		gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2)
+
+	default:
+		t.Errorf("Unknown method: Int16x32.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
+func testInt16x32TernaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16, v3 []int16, want []int16, which string) {
+	t.Helper()
+	var gotv simd.Int16x32
+	got := make([]int16, len(want))
+	vec0 := simd.LoadInt16x32Slice(v0)
+	vec1 := simd.LoadInt16x32Slice(v1)
+	vec2 := simd.LoadInt16x32Slice(v2)
+	vec3 := simd.LoadInt16x32Slice(v3)
+	switch which {
+	case "MaskedShiftLeftAndFillUpperFrom":
+		gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask16x32())
+	case "MaskedShiftRightAndFillUpperFrom":
+		gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask16x32())
+
+	default:
+		t.Errorf("Unknown method: Int16x32.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testInt16x32Unary(t *testing.T, v0 []int16, want []int16, which string) {
 	t.Helper()
 	var gotv simd.Int16x32
@@ -2839,6 +3022,16 @@ func testInt32x4Binary(t *testing.T, v0 []int32, v1 []int32, want []int32, which
 		gotv = vec0.PairwiseAdd(vec1)
 	case "PairwiseSub":
 		gotv = vec0.PairwiseSub(vec1)
+	case "RotateLeft":
+		gotv = vec0.RotateLeft(vec1)
+	case "RotateRight":
+		gotv = vec0.RotateRight(vec1)
+	case "ShiftLeft":
+		gotv = vec0.ShiftLeft(vec1)
+	case "ShiftRight":
+		gotv = vec0.ShiftRight(vec1)
+	case "ShiftRightSignExtended":
+		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sign":
 		gotv = vec0.Sign(vec1)
 	case "Sub":
@@ -2879,6 +3072,16 @@ func testInt32x4BinaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32, w
 		gotv = vec0.MaskedMulLow(vec1, vec2.AsMask32x4())
 	case "MaskedOr":
 		gotv = vec0.MaskedOr(vec1, vec2.AsMask32x4())
+	case "MaskedRotateLeft":
+		gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask32x4())
+	case "MaskedRotateRight":
+		gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask32x4())
+	case "MaskedShiftLeft":
+		gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask32x4())
+	case "MaskedShiftRight":
+		gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask32x4())
+	case "MaskedShiftRightSignExtended":
+		gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask32x4())
 	case "MaskedSub":
 		gotv = vec0.MaskedSub(vec1, vec2.AsMask32x4())
 	case "MaskedXor":
@@ -3028,6 +3231,55 @@ func testInt32x4MaskedCompare(t *testing.T, v0 []int32, v1 []int32, v2 []int32,
 	}
 }
 
+func testInt32x4Ternary(t *testing.T, v0 []int32, v1 []int32, v2 []int32, want []int32, which string) {
+	t.Helper()
+	var gotv simd.Int32x4
+	got := make([]int32, len(want))
+	vec0 := simd.LoadInt32x4Slice(v0)
+	vec1 := simd.LoadInt32x4Slice(v1)
+	vec2 := simd.LoadInt32x4Slice(v2)
+	switch which {
+	case "ShiftLeftAndFillUpperFrom":
+		gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2)
+	case "ShiftRightAndFillUpperFrom":
+		gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2)
+
+	default:
+		t.Errorf("Unknown method: Int32x4.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
+func testInt32x4TernaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32, v3 []int32, want []int32, which string) {
+	t.Helper()
+	var gotv simd.Int32x4
+	got := make([]int32, len(want))
+	vec0 := simd.LoadInt32x4Slice(v0)
+	vec1 := simd.LoadInt32x4Slice(v1)
+	vec2 := simd.LoadInt32x4Slice(v2)
+	vec3 := simd.LoadInt32x4Slice(v3)
+	switch which {
+	case "MaskedShiftLeftAndFillUpperFrom":
+		gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask32x4())
+	case "MaskedShiftRightAndFillUpperFrom":
+		gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask32x4())
+
+	default:
+		t.Errorf("Unknown method: Int32x4.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testInt32x4Uint8x16Int8x16Int32x4(t *testing.T, v0 []int32, v1 []uint8, v2 []int8, want []int32, which string) {
 	t.Helper()
 	var gotv simd.Int32x4
@@ -3147,6 +3399,16 @@ func testInt32x8Binary(t *testing.T, v0 []int32, v1 []int32, want []int32, which
 		gotv = vec0.PairwiseAdd(vec1)
 	case "PairwiseSub":
 		gotv = vec0.PairwiseSub(vec1)
+	case "RotateLeft":
+		gotv = vec0.RotateLeft(vec1)
+	case "RotateRight":
+		gotv = vec0.RotateRight(vec1)
+	case "ShiftLeft":
+		gotv = vec0.ShiftLeft(vec1)
+	case "ShiftRight":
+		gotv = vec0.ShiftRight(vec1)
+	case "ShiftRightSignExtended":
+		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sign":
 		gotv = vec0.Sign(vec1)
 	case "Sub":
@@ -3187,6 +3449,16 @@ func testInt32x8BinaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32, w
 		gotv = vec0.MaskedMulLow(vec1, vec2.AsMask32x8())
 	case "MaskedOr":
 		gotv = vec0.MaskedOr(vec1, vec2.AsMask32x8())
+	case "MaskedRotateLeft":
+		gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask32x8())
+	case "MaskedRotateRight":
+		gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask32x8())
+	case "MaskedShiftLeft":
+		gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask32x8())
+	case "MaskedShiftRight":
+		gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask32x8())
+	case "MaskedShiftRightSignExtended":
+		gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask32x8())
 	case "MaskedSub":
 		gotv = vec0.MaskedSub(vec1, vec2.AsMask32x8())
 	case "MaskedXor":
@@ -3336,6 +3608,55 @@ func testInt32x8MaskedCompare(t *testing.T, v0 []int32, v1 []int32, v2 []int32,
 	}
 }
 
+func testInt32x8Ternary(t *testing.T, v0 []int32, v1 []int32, v2 []int32, want []int32, which string) {
+	t.Helper()
+	var gotv simd.Int32x8
+	got := make([]int32, len(want))
+	vec0 := simd.LoadInt32x8Slice(v0)
+	vec1 := simd.LoadInt32x8Slice(v1)
+	vec2 := simd.LoadInt32x8Slice(v2)
+	switch which {
+	case "ShiftLeftAndFillUpperFrom":
+		gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2)
+	case "ShiftRightAndFillUpperFrom":
+		gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2)
+
+	default:
+		t.Errorf("Unknown method: Int32x8.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
+func testInt32x8TernaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32, v3 []int32, want []int32, which string) {
+	t.Helper()
+	var gotv simd.Int32x8
+	got := make([]int32, len(want))
+	vec0 := simd.LoadInt32x8Slice(v0)
+	vec1 := simd.LoadInt32x8Slice(v1)
+	vec2 := simd.LoadInt32x8Slice(v2)
+	vec3 := simd.LoadInt32x8Slice(v3)
+	switch which {
+	case "MaskedShiftLeftAndFillUpperFrom":
+		gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask32x8())
+	case "MaskedShiftRightAndFillUpperFrom":
+		gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask32x8())
+
+	default:
+		t.Errorf("Unknown method: Int32x8.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testInt32x8Uint8x32Int8x32Int32x8(t *testing.T, v0 []int32, v1 []uint8, v2 []int8, want []int32, which string) {
 	t.Helper()
 	var gotv simd.Int32x8
@@ -3451,6 +3772,16 @@ func testInt32x16Binary(t *testing.T, v0 []int32, v1 []int32, want []int32, whic
 		gotv = vec0.MulLow(vec1)
 	case "Or":
 		gotv = vec0.Or(vec1)
+	case "RotateLeft":
+		gotv = vec0.RotateLeft(vec1)
+	case "RotateRight":
+		gotv = vec0.RotateRight(vec1)
+	case "ShiftLeft":
+		gotv = vec0.ShiftLeft(vec1)
+	case "ShiftRight":
+		gotv = vec0.ShiftRight(vec1)
+	case "ShiftRightSignExtended":
+		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@@ -3489,6 +3820,16 @@ func testInt32x16BinaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32,
 		gotv = vec0.MaskedMulLow(vec1, vec2.AsMask32x16())
 	case "MaskedOr":
 		gotv = vec0.MaskedOr(vec1, vec2.AsMask32x16())
+	case "MaskedRotateLeft":
+		gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask32x16())
+	case "MaskedRotateRight":
+		gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask32x16())
+	case "MaskedShiftLeft":
+		gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask32x16())
+	case "MaskedShiftRight":
+		gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask32x16())
+	case "MaskedShiftRightSignExtended":
+		gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask32x16())
 	case "MaskedSub":
 		gotv = vec0.MaskedSub(vec1, vec2.AsMask32x16())
 	case "MaskedXor":
@@ -3617,6 +3958,55 @@ func testInt32x16MaskedCompare(t *testing.T, v0 []int32, v1 []int32, v2 []int32,
 	}
 }
 
+func testInt32x16Ternary(t *testing.T, v0 []int32, v1 []int32, v2 []int32, want []int32, which string) {
+	t.Helper()
+	var gotv simd.Int32x16
+	got := make([]int32, len(want))
+	vec0 := simd.LoadInt32x16Slice(v0)
+	vec1 := simd.LoadInt32x16Slice(v1)
+	vec2 := simd.LoadInt32x16Slice(v2)
+	switch which {
+	case "ShiftLeftAndFillUpperFrom":
+		gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2)
+	case "ShiftRightAndFillUpperFrom":
+		gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2)
+
+	default:
+		t.Errorf("Unknown method: Int32x16.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
+func testInt32x16TernaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32, v3 []int32, want []int32, which string) {
+	t.Helper()
+	var gotv simd.Int32x16
+	got := make([]int32, len(want))
+	vec0 := simd.LoadInt32x16Slice(v0)
+	vec1 := simd.LoadInt32x16Slice(v1)
+	vec2 := simd.LoadInt32x16Slice(v2)
+	vec3 := simd.LoadInt32x16Slice(v3)
+	switch which {
+	case "MaskedShiftLeftAndFillUpperFrom":
+		gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask32x16())
+	case "MaskedShiftRightAndFillUpperFrom":
+		gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask32x16())
+
+	default:
+		t.Errorf("Unknown method: Int32x16.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testInt32x16Uint8x64Int8x64Int32x16(t *testing.T, v0 []int32, v1 []uint8, v2 []int8, want []int32, which string) {
 	t.Helper()
 	var gotv simd.Int32x16
@@ -3734,6 +4124,16 @@ func testInt64x2Binary(t *testing.T, v0 []int64, v1 []int64, want []int64, which
 		gotv = vec0.MulLow(vec1)
 	case "Or":
 		gotv = vec0.Or(vec1)
+	case "RotateLeft":
+		gotv = vec0.RotateLeft(vec1)
+	case "RotateRight":
+		gotv = vec0.RotateRight(vec1)
+	case "ShiftLeft":
+		gotv = vec0.ShiftLeft(vec1)
+	case "ShiftRight":
+		gotv = vec0.ShiftRight(vec1)
+	case "ShiftRightSignExtended":
+		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@@ -3774,6 +4174,16 @@ func testInt64x2BinaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, w
 		gotv = vec0.MaskedMulLow(vec1, vec2.AsMask64x2())
 	case "MaskedOr":
 		gotv = vec0.MaskedOr(vec1, vec2.AsMask64x2())
+	case "MaskedRotateLeft":
+		gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask64x2())
+	case "MaskedRotateRight":
+		gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask64x2())
+	case "MaskedShiftLeft":
+		gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask64x2())
+	case "MaskedShiftRight":
+		gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask64x2())
+	case "MaskedShiftRightSignExtended":
+		gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask64x2())
 	case "MaskedSub":
 		gotv = vec0.MaskedSub(vec1, vec2.AsMask64x2())
 	case "MaskedXor":
@@ -3853,6 +4263,55 @@ func testInt64x2MaskedCompare(t *testing.T, v0 []int64, v1 []int64, v2 []int64,
 	}
 }
 
+func testInt64x2Ternary(t *testing.T, v0 []int64, v1 []int64, v2 []int64, want []int64, which string) {
+	t.Helper()
+	var gotv simd.Int64x2
+	got := make([]int64, len(want))
+	vec0 := simd.LoadInt64x2Slice(v0)
+	vec1 := simd.LoadInt64x2Slice(v1)
+	vec2 := simd.LoadInt64x2Slice(v2)
+	switch which {
+	case "ShiftLeftAndFillUpperFrom":
+		gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2)
+	case "ShiftRightAndFillUpperFrom":
+		gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2)
+
+	default:
+		t.Errorf("Unknown method: Int64x2.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
+func testInt64x2TernaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, v3 []int64, want []int64, which string) {
+	t.Helper()
+	var gotv simd.Int64x2
+	got := make([]int64, len(want))
+	vec0 := simd.LoadInt64x2Slice(v0)
+	vec1 := simd.LoadInt64x2Slice(v1)
+	vec2 := simd.LoadInt64x2Slice(v2)
+	vec3 := simd.LoadInt64x2Slice(v3)
+	switch which {
+	case "MaskedShiftLeftAndFillUpperFrom":
+		gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask64x2())
+	case "MaskedShiftRightAndFillUpperFrom":
+		gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask64x2())
+
+	default:
+		t.Errorf("Unknown method: Int64x2.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testInt64x2Unary(t *testing.T, v0 []int64, want []int64, which string) {
 	t.Helper()
 	var gotv simd.Int64x2
@@ -3921,6 +4380,16 @@ func testInt64x4Binary(t *testing.T, v0 []int64, v1 []int64, want []int64, which
 		gotv = vec0.MulLow(vec1)
 	case "Or":
 		gotv = vec0.Or(vec1)
+	case "RotateLeft":
+		gotv = vec0.RotateLeft(vec1)
+	case "RotateRight":
+		gotv = vec0.RotateRight(vec1)
+	case "ShiftLeft":
+		gotv = vec0.ShiftLeft(vec1)
+	case "ShiftRight":
+		gotv = vec0.ShiftRight(vec1)
+	case "ShiftRightSignExtended":
+		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@@ -3961,6 +4430,16 @@ func testInt64x4BinaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, w
 		gotv = vec0.MaskedMulLow(vec1, vec2.AsMask64x4())
 	case "MaskedOr":
 		gotv = vec0.MaskedOr(vec1, vec2.AsMask64x4())
+	case "MaskedRotateLeft":
+		gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask64x4())
+	case "MaskedRotateRight":
+		gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask64x4())
+	case "MaskedShiftLeft":
+		gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask64x4())
+	case "MaskedShiftRight":
+		gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask64x4())
+	case "MaskedShiftRightSignExtended":
+		gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask64x4())
 	case "MaskedSub":
 		gotv = vec0.MaskedSub(vec1, vec2.AsMask64x4())
 	case "MaskedXor":
@@ -4040,6 +4519,55 @@ func testInt64x4MaskedCompare(t *testing.T, v0 []int64, v1 []int64, v2 []int64,
 	}
 }
 
+func testInt64x4Ternary(t *testing.T, v0 []int64, v1 []int64, v2 []int64, want []int64, which string) {
+	t.Helper()
+	var gotv simd.Int64x4
+	got := make([]int64, len(want))
+	vec0 := simd.LoadInt64x4Slice(v0)
+	vec1 := simd.LoadInt64x4Slice(v1)
+	vec2 := simd.LoadInt64x4Slice(v2)
+	switch which {
+	case "ShiftLeftAndFillUpperFrom":
+		gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2)
+	case "ShiftRightAndFillUpperFrom":
+		gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2)
+
+	default:
+		t.Errorf("Unknown method: Int64x4.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
+func testInt64x4TernaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, v3 []int64, want []int64, which string) {
+	t.Helper()
+	var gotv simd.Int64x4
+	got := make([]int64, len(want))
+	vec0 := simd.LoadInt64x4Slice(v0)
+	vec1 := simd.LoadInt64x4Slice(v1)
+	vec2 := simd.LoadInt64x4Slice(v2)
+	vec3 := simd.LoadInt64x4Slice(v3)
+	switch which {
+	case "MaskedShiftLeftAndFillUpperFrom":
+		gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask64x4())
+	case "MaskedShiftRightAndFillUpperFrom":
+		gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask64x4())
+
+	default:
+		t.Errorf("Unknown method: Int64x4.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testInt64x4Unary(t *testing.T, v0 []int64, want []int64, which string) {
 	t.Helper()
 	var gotv simd.Int64x4
@@ -4108,6 +4636,16 @@ func testInt64x8Binary(t *testing.T, v0 []int64, v1 []int64, want []int64, which
 		gotv = vec0.MulLow(vec1)
 	case "Or":
 		gotv = vec0.Or(vec1)
+	case "RotateLeft":
+		gotv = vec0.RotateLeft(vec1)
+	case "RotateRight":
+		gotv = vec0.RotateRight(vec1)
+	case "ShiftLeft":
+		gotv = vec0.ShiftLeft(vec1)
+	case "ShiftRight":
+		gotv = vec0.ShiftRight(vec1)
+	case "ShiftRightSignExtended":
+		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@@ -4148,6 +4686,16 @@ func testInt64x8BinaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, w
 		gotv = vec0.MaskedMulLow(vec1, vec2.AsMask64x8())
 	case "MaskedOr":
 		gotv = vec0.MaskedOr(vec1, vec2.AsMask64x8())
+	case "MaskedRotateLeft":
+		gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask64x8())
+	case "MaskedRotateRight":
+		gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask64x8())
+	case "MaskedShiftLeft":
+		gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask64x8())
+	case "MaskedShiftRight":
+		gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask64x8())
+	case "MaskedShiftRightSignExtended":
+		gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask64x8())
 	case "MaskedSub":
 		gotv = vec0.MaskedSub(vec1, vec2.AsMask64x8())
 	case "MaskedXor":
@@ -4227,6 +4775,55 @@ func testInt64x8MaskedCompare(t *testing.T, v0 []int64, v1 []int64, v2 []int64,
 	}
 }
 
+func testInt64x8Ternary(t *testing.T, v0 []int64, v1 []int64, v2 []int64, want []int64, which string) {
+	t.Helper()
+	var gotv simd.Int64x8
+	got := make([]int64, len(want))
+	vec0 := simd.LoadInt64x8Slice(v0)
+	vec1 := simd.LoadInt64x8Slice(v1)
+	vec2 := simd.LoadInt64x8Slice(v2)
+	switch which {
+	case "ShiftLeftAndFillUpperFrom":
+		gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2)
+	case "ShiftRightAndFillUpperFrom":
+		gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2)
+
+	default:
+		t.Errorf("Unknown method: Int64x8.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
+func testInt64x8TernaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, v3 []int64, want []int64, which string) {
+	t.Helper()
+	var gotv simd.Int64x8
+	got := make([]int64, len(want))
+	vec0 := simd.LoadInt64x8Slice(v0)
+	vec1 := simd.LoadInt64x8Slice(v1)
+	vec2 := simd.LoadInt64x8Slice(v2)
+	vec3 := simd.LoadInt64x8Slice(v3)
+	switch which {
+	case "MaskedShiftLeftAndFillUpperFrom":
+		gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask64x8())
+	case "MaskedShiftRightAndFillUpperFrom":
+		gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask64x8())
+
+	default:
+		t.Errorf("Unknown method: Int64x8.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testInt64x8Unary(t *testing.T, v0 []int64, want []int64, which string) {
 	t.Helper()
 	var gotv simd.Int64x8
@@ -4961,6 +5558,12 @@ func testUint16x8Binary(t *testing.T, v0 []uint16, v1 []uint16, want []uint16, w
 		gotv = vec0.SaturatedAdd(vec1)
 	case "SaturatedSub":
 		gotv = vec0.SaturatedSub(vec1)
+	case "ShiftLeft":
+		gotv = vec0.ShiftLeft(vec1)
+	case "ShiftRight":
+		gotv = vec0.ShiftRight(vec1)
+	case "ShiftRightSignExtended":
+		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@@ -4999,6 +5602,12 @@ func testUint16x8BinaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []int16
 		gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask16x8())
 	case "MaskedSaturatedSub":
 		gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask16x8())
+	case "MaskedShiftLeft":
+		gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask16x8())
+	case "MaskedShiftRight":
+		gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask16x8())
+	case "MaskedShiftRightSignExtended":
+		gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask16x8())
 	case "MaskedSub":
 		gotv = vec0.MaskedSub(vec1, vec2.AsMask16x8())
 
@@ -5076,6 +5685,55 @@ func testUint16x8MaskedCompare(t *testing.T, v0 []uint16, v1 []uint16, v2 []int1
 	}
 }
 
+func testUint16x8Ternary(t *testing.T, v0 []uint16, v1 []uint16, v2 []uint16, want []uint16, which string) {
+	t.Helper()
+	var gotv simd.Uint16x8
+	got := make([]uint16, len(want))
+	vec0 := simd.LoadUint16x8Slice(v0)
+	vec1 := simd.LoadUint16x8Slice(v1)
+	vec2 := simd.LoadUint16x8Slice(v2)
+	switch which {
+	case "ShiftLeftAndFillUpperFrom":
+		gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2)
+	case "ShiftRightAndFillUpperFrom":
+		gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2)
+
+	default:
+		t.Errorf("Unknown method: Uint16x8.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
+func testUint16x8TernaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []uint16, v3 []int16, want []uint16, which string) {
+	t.Helper()
+	var gotv simd.Uint16x8
+	got := make([]uint16, len(want))
+	vec0 := simd.LoadUint16x8Slice(v0)
+	vec1 := simd.LoadUint16x8Slice(v1)
+	vec2 := simd.LoadUint16x8Slice(v2)
+	vec3 := simd.LoadInt16x8Slice(v3)
+	switch which {
+	case "MaskedShiftLeftAndFillUpperFrom":
+		gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask16x8())
+	case "MaskedShiftRightAndFillUpperFrom":
+		gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask16x8())
+
+	default:
+		t.Errorf("Unknown method: Uint16x8.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testUint16x8Unary(t *testing.T, v0 []uint16, want []uint16, which string) {
 	t.Helper()
 	var gotv simd.Uint16x8
@@ -5148,6 +5806,12 @@ func testUint16x16Binary(t *testing.T, v0 []uint16, v1 []uint16, want []uint16,
 		gotv = vec0.SaturatedAdd(vec1)
 	case "SaturatedSub":
 		gotv = vec0.SaturatedSub(vec1)
+	case "ShiftLeft":
+		gotv = vec0.ShiftLeft(vec1)
+	case "ShiftRight":
+		gotv = vec0.ShiftRight(vec1)
+	case "ShiftRightSignExtended":
+		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@@ -5186,6 +5850,12 @@ func testUint16x16BinaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []int1
 		gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask16x16())
 	case "MaskedSaturatedSub":
 		gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask16x16())
+	case "MaskedShiftLeft":
+		gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask16x16())
+	case "MaskedShiftRight":
+		gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask16x16())
+	case "MaskedShiftRightSignExtended":
+		gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask16x16())
 	case "MaskedSub":
 		gotv = vec0.MaskedSub(vec1, vec2.AsMask16x16())
 
@@ -5263,6 +5933,55 @@ func testUint16x16MaskedCompare(t *testing.T, v0 []uint16, v1 []uint16, v2 []int
 	}
 }
 
+func testUint16x16Ternary(t *testing.T, v0 []uint16, v1 []uint16, v2 []uint16, want []uint16, which string) {
+	t.Helper()
+	var gotv simd.Uint16x16
+	got := make([]uint16, len(want))
+	vec0 := simd.LoadUint16x16Slice(v0)
+	vec1 := simd.LoadUint16x16Slice(v1)
+	vec2 := simd.LoadUint16x16Slice(v2)
+	switch which {
+	case "ShiftLeftAndFillUpperFrom":
+		gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2)
+	case "ShiftRightAndFillUpperFrom":
+		gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2)
+
+	default:
+		t.Errorf("Unknown method: Uint16x16.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
+func testUint16x16TernaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []uint16, v3 []int16, want []uint16, which string) {
+	t.Helper()
+	var gotv simd.Uint16x16
+	got := make([]uint16, len(want))
+	vec0 := simd.LoadUint16x16Slice(v0)
+	vec1 := simd.LoadUint16x16Slice(v1)
+	vec2 := simd.LoadUint16x16Slice(v2)
+	vec3 := simd.LoadInt16x16Slice(v3)
+	switch which {
+	case "MaskedShiftLeftAndFillUpperFrom":
+		gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask16x16())
+	case "MaskedShiftRightAndFillUpperFrom":
+		gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask16x16())
+
+	default:
+		t.Errorf("Unknown method: Uint16x16.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testUint16x16Unary(t *testing.T, v0 []uint16, want []uint16, which string) {
 	t.Helper()
 	var gotv simd.Uint16x16
@@ -5325,6 +6044,12 @@ func testUint16x32Binary(t *testing.T, v0 []uint16, v1 []uint16, want []uint16,
 		gotv = vec0.SaturatedAdd(vec1)
 	case "SaturatedSub":
 		gotv = vec0.SaturatedSub(vec1)
+	case "ShiftLeft":
+		gotv = vec0.ShiftLeft(vec1)
+	case "ShiftRight":
+		gotv = vec0.ShiftRight(vec1)
+	case "ShiftRightSignExtended":
+		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 
@@ -5361,6 +6086,12 @@ func testUint16x32BinaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []int1
 		gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask16x32())
 	case "MaskedSaturatedSub":
 		gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask16x32())
+	case "MaskedShiftLeft":
+		gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask16x32())
+	case "MaskedShiftRight":
+		gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask16x32())
+	case "MaskedShiftRightSignExtended":
+		gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask16x32())
 	case "MaskedSub":
 		gotv = vec0.MaskedSub(vec1, vec2.AsMask16x32())
 
@@ -5438,6 +6169,55 @@ func testUint16x32MaskedCompare(t *testing.T, v0 []uint16, v1 []uint16, v2 []int
 	}
 }
 
+func testUint16x32Ternary(t *testing.T, v0 []uint16, v1 []uint16, v2 []uint16, want []uint16, which string) {
+	t.Helper()
+	var gotv simd.Uint16x32
+	got := make([]uint16, len(want))
+	vec0 := simd.LoadUint16x32Slice(v0)
+	vec1 := simd.LoadUint16x32Slice(v1)
+	vec2 := simd.LoadUint16x32Slice(v2)
+	switch which {
+	case "ShiftLeftAndFillUpperFrom":
+		gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2)
+	case "ShiftRightAndFillUpperFrom":
+		gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2)
+
+	default:
+		t.Errorf("Unknown method: Uint16x32.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
+func testUint16x32TernaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []uint16, v3 []int16, want []uint16, which string) {
+	t.Helper()
+	var gotv simd.Uint16x32
+	got := make([]uint16, len(want))
+	vec0 := simd.LoadUint16x32Slice(v0)
+	vec1 := simd.LoadUint16x32Slice(v1)
+	vec2 := simd.LoadUint16x32Slice(v2)
+	vec3 := simd.LoadInt16x32Slice(v3)
+	switch which {
+	case "MaskedShiftLeftAndFillUpperFrom":
+		gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask16x32())
+	case "MaskedShiftRightAndFillUpperFrom":
+		gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask16x32())
+
+	default:
+		t.Errorf("Unknown method: Uint16x32.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testUint16x32Unary(t *testing.T, v0 []uint16, want []uint16, which string) {
 	t.Helper()
 	var gotv simd.Uint16x32
@@ -5502,6 +6282,16 @@ func testUint32x4Binary(t *testing.T, v0 []uint32, v1 []uint32, want []uint32, w
 		gotv = vec0.PairwiseAdd(vec1)
 	case "PairwiseSub":
 		gotv = vec0.PairwiseSub(vec1)
+	case "RotateLeft":
+		gotv = vec0.RotateLeft(vec1)
+	case "RotateRight":
+		gotv = vec0.RotateRight(vec1)
+	case "ShiftLeft":
+		gotv = vec0.ShiftLeft(vec1)
+	case "ShiftRight":
+		gotv = vec0.ShiftRight(vec1)
+	case "ShiftRightSignExtended":
+		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@@ -5538,6 +6328,16 @@ func testUint32x4BinaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32
 		gotv = vec0.MaskedMin(vec1, vec2.AsMask32x4())
 	case "MaskedOr":
 		gotv = vec0.MaskedOr(vec1, vec2.AsMask32x4())
+	case "MaskedRotateLeft":
+		gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask32x4())
+	case "MaskedRotateRight":
+		gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask32x4())
+	case "MaskedShiftLeft":
+		gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask32x4())
+	case "MaskedShiftRight":
+		gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask32x4())
+	case "MaskedShiftRightSignExtended":
+		gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask32x4())
 	case "MaskedSub":
 		gotv = vec0.MaskedSub(vec1, vec2.AsMask32x4())
 	case "MaskedXor":
@@ -5638,6 +6438,55 @@ func testUint32x4MaskedCompare(t *testing.T, v0 []uint32, v1 []uint32, v2 []int3
 	}
 }
 
+func testUint32x4Ternary(t *testing.T, v0 []uint32, v1 []uint32, v2 []uint32, want []uint32, which string) {
+	t.Helper()
+	var gotv simd.Uint32x4
+	got := make([]uint32, len(want))
+	vec0 := simd.LoadUint32x4Slice(v0)
+	vec1 := simd.LoadUint32x4Slice(v1)
+	vec2 := simd.LoadUint32x4Slice(v2)
+	switch which {
+	case "ShiftLeftAndFillUpperFrom":
+		gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2)
+	case "ShiftRightAndFillUpperFrom":
+		gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2)
+
+	default:
+		t.Errorf("Unknown method: Uint32x4.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
+func testUint32x4TernaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []uint32, v3 []int32, want []uint32, which string) {
+	t.Helper()
+	var gotv simd.Uint32x4
+	got := make([]uint32, len(want))
+	vec0 := simd.LoadUint32x4Slice(v0)
+	vec1 := simd.LoadUint32x4Slice(v1)
+	vec2 := simd.LoadUint32x4Slice(v2)
+	vec3 := simd.LoadInt32x4Slice(v3)
+	switch which {
+	case "MaskedShiftLeftAndFillUpperFrom":
+		gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask32x4())
+	case "MaskedShiftRightAndFillUpperFrom":
+		gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask32x4())
+
+	default:
+		t.Errorf("Unknown method: Uint32x4.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testUint32x4Uint8x16Int8x16Mask32x4Uint32x4(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, v3 []int32, want []uint32, which string) {
 	t.Helper()
 	var gotv simd.Uint32x4
@@ -5751,6 +6600,16 @@ func testUint32x8Binary(t *testing.T, v0 []uint32, v1 []uint32, want []uint32, w
 		gotv = vec0.PairwiseAdd(vec1)
 	case "PairwiseSub":
 		gotv = vec0.PairwiseSub(vec1)
+	case "RotateLeft":
+		gotv = vec0.RotateLeft(vec1)
+	case "RotateRight":
+		gotv = vec0.RotateRight(vec1)
+	case "ShiftLeft":
+		gotv = vec0.ShiftLeft(vec1)
+	case "ShiftRight":
+		gotv = vec0.ShiftRight(vec1)
+	case "ShiftRightSignExtended":
+		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@@ -5787,6 +6646,16 @@ func testUint32x8BinaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32
 		gotv = vec0.MaskedMin(vec1, vec2.AsMask32x8())
 	case "MaskedOr":
 		gotv = vec0.MaskedOr(vec1, vec2.AsMask32x8())
+	case "MaskedRotateLeft":
+		gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask32x8())
+	case "MaskedRotateRight":
+		gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask32x8())
+	case "MaskedShiftLeft":
+		gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask32x8())
+	case "MaskedShiftRight":
+		gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask32x8())
+	case "MaskedShiftRightSignExtended":
+		gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask32x8())
 	case "MaskedSub":
 		gotv = vec0.MaskedSub(vec1, vec2.AsMask32x8())
 	case "MaskedXor":
@@ -5887,6 +6756,55 @@ func testUint32x8MaskedCompare(t *testing.T, v0 []uint32, v1 []uint32, v2 []int3
 	}
 }
 
+func testUint32x8Ternary(t *testing.T, v0 []uint32, v1 []uint32, v2 []uint32, want []uint32, which string) {
+	t.Helper()
+	var gotv simd.Uint32x8
+	got := make([]uint32, len(want))
+	vec0 := simd.LoadUint32x8Slice(v0)
+	vec1 := simd.LoadUint32x8Slice(v1)
+	vec2 := simd.LoadUint32x8Slice(v2)
+	switch which {
+	case "ShiftLeftAndFillUpperFrom":
+		gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2)
+	case "ShiftRightAndFillUpperFrom":
+		gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2)
+
+	default:
+		t.Errorf("Unknown method: Uint32x8.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
+func testUint32x8TernaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []uint32, v3 []int32, want []uint32, which string) {
+	t.Helper()
+	var gotv simd.Uint32x8
+	got := make([]uint32, len(want))
+	vec0 := simd.LoadUint32x8Slice(v0)
+	vec1 := simd.LoadUint32x8Slice(v1)
+	vec2 := simd.LoadUint32x8Slice(v2)
+	vec3 := simd.LoadInt32x8Slice(v3)
+	switch which {
+	case "MaskedShiftLeftAndFillUpperFrom":
+		gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask32x8())
+	case "MaskedShiftRightAndFillUpperFrom":
+		gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask32x8())
+
+	default:
+		t.Errorf("Unknown method: Uint32x8.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testUint32x8Uint8x32Int8x32Mask32x8Uint32x8(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, v3 []int32, want []uint32, which string) {
 	t.Helper()
 	var gotv simd.Uint32x8
@@ -5996,6 +6914,16 @@ func testUint32x16Binary(t *testing.T, v0 []uint32, v1 []uint32, want []uint32,
 		gotv = vec0.Min(vec1)
 	case "Or":
 		gotv = vec0.Or(vec1)
+	case "RotateLeft":
+		gotv = vec0.RotateLeft(vec1)
+	case "RotateRight":
+		gotv = vec0.RotateRight(vec1)
+	case "ShiftLeft":
+		gotv = vec0.ShiftLeft(vec1)
+	case "ShiftRight":
+		gotv = vec0.ShiftRight(vec1)
+	case "ShiftRightSignExtended":
+		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@@ -6032,6 +6960,16 @@ func testUint32x16BinaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []int3
 		gotv = vec0.MaskedMin(vec1, vec2.AsMask32x16())
 	case "MaskedOr":
 		gotv = vec0.MaskedOr(vec1, vec2.AsMask32x16())
+	case "MaskedRotateLeft":
+		gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask32x16())
+	case "MaskedRotateRight":
+		gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask32x16())
+	case "MaskedShiftLeft":
+		gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask32x16())
+	case "MaskedShiftRight":
+		gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask32x16())
+	case "MaskedShiftRightSignExtended":
+		gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask32x16())
 	case "MaskedSub":
 		gotv = vec0.MaskedSub(vec1, vec2.AsMask32x16())
 	case "MaskedXor":
@@ -6111,6 +7049,55 @@ func testUint32x16MaskedCompare(t *testing.T, v0 []uint32, v1 []uint32, v2 []int
 	}
 }
 
+func testUint32x16Ternary(t *testing.T, v0 []uint32, v1 []uint32, v2 []uint32, want []uint32, which string) {
+	t.Helper()
+	var gotv simd.Uint32x16
+	got := make([]uint32, len(want))
+	vec0 := simd.LoadUint32x16Slice(v0)
+	vec1 := simd.LoadUint32x16Slice(v1)
+	vec2 := simd.LoadUint32x16Slice(v2)
+	switch which {
+	case "ShiftLeftAndFillUpperFrom":
+		gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2)
+	case "ShiftRightAndFillUpperFrom":
+		gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2)
+
+	default:
+		t.Errorf("Unknown method: Uint32x16.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
+func testUint32x16TernaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []uint32, v3 []int32, want []uint32, which string) {
+	t.Helper()
+	var gotv simd.Uint32x16
+	got := make([]uint32, len(want))
+	vec0 := simd.LoadUint32x16Slice(v0)
+	vec1 := simd.LoadUint32x16Slice(v1)
+	vec2 := simd.LoadUint32x16Slice(v2)
+	vec3 := simd.LoadInt32x16Slice(v3)
+	switch which {
+	case "MaskedShiftLeftAndFillUpperFrom":
+		gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask32x16())
+	case "MaskedShiftRightAndFillUpperFrom":
+		gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask32x16())
+
+	default:
+		t.Errorf("Unknown method: Uint32x16.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testUint32x16Uint8x64Int8x64Mask32x16Uint32x16(t *testing.T, v0 []uint32, v1 []uint8, v2 []int8, v3 []int32, want []uint32, which string) {
 	t.Helper()
 	var gotv simd.Uint32x16
@@ -6222,6 +7209,16 @@ func testUint64x2Binary(t *testing.T, v0 []uint64, v1 []uint64, want []uint64, w
 		gotv = vec0.MulEvenWiden(vec1)
 	case "Or":
 		gotv = vec0.Or(vec1)
+	case "RotateLeft":
+		gotv = vec0.RotateLeft(vec1)
+	case "RotateRight":
+		gotv = vec0.RotateRight(vec1)
+	case "ShiftLeft":
+		gotv = vec0.ShiftLeft(vec1)
+	case "ShiftRight":
+		gotv = vec0.ShiftRight(vec1)
+	case "ShiftRightSignExtended":
+		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@@ -6260,6 +7257,16 @@ func testUint64x2BinaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64
 		gotv = vec0.MaskedMulEvenWiden(vec1, vec2.AsMask64x2())
 	case "MaskedOr":
 		gotv = vec0.MaskedOr(vec1, vec2.AsMask64x2())
+	case "MaskedRotateLeft":
+		gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask64x2())
+	case "MaskedRotateRight":
+		gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask64x2())
+	case "MaskedShiftLeft":
+		gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask64x2())
+	case "MaskedShiftRight":
+		gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask64x2())
+	case "MaskedShiftRightSignExtended":
+		gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask64x2())
 	case "MaskedSub":
 		gotv = vec0.MaskedSub(vec1, vec2.AsMask64x2())
 	case "MaskedXor":
@@ -6339,6 +7346,55 @@ func testUint64x2MaskedCompare(t *testing.T, v0 []uint64, v1 []uint64, v2 []int6
 	}
 }
 
+func testUint64x2Ternary(t *testing.T, v0 []uint64, v1 []uint64, v2 []uint64, want []uint64, which string) {
+	t.Helper()
+	var gotv simd.Uint64x2
+	got := make([]uint64, len(want))
+	vec0 := simd.LoadUint64x2Slice(v0)
+	vec1 := simd.LoadUint64x2Slice(v1)
+	vec2 := simd.LoadUint64x2Slice(v2)
+	switch which {
+	case "ShiftLeftAndFillUpperFrom":
+		gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2)
+	case "ShiftRightAndFillUpperFrom":
+		gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2)
+
+	default:
+		t.Errorf("Unknown method: Uint64x2.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
+func testUint64x2TernaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []uint64, v3 []int64, want []uint64, which string) {
+	t.Helper()
+	var gotv simd.Uint64x2
+	got := make([]uint64, len(want))
+	vec0 := simd.LoadUint64x2Slice(v0)
+	vec1 := simd.LoadUint64x2Slice(v1)
+	vec2 := simd.LoadUint64x2Slice(v2)
+	vec3 := simd.LoadInt64x2Slice(v3)
+	switch which {
+	case "MaskedShiftLeftAndFillUpperFrom":
+		gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask64x2())
+	case "MaskedShiftRightAndFillUpperFrom":
+		gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask64x2())
+
+	default:
+		t.Errorf("Unknown method: Uint64x2.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testUint64x2Unary(t *testing.T, v0 []uint64, want []uint64, which string) {
 	t.Helper()
 	var gotv simd.Uint64x2
@@ -6401,6 +7457,16 @@ func testUint64x4Binary(t *testing.T, v0 []uint64, v1 []uint64, want []uint64, w
 		gotv = vec0.MulEvenWiden(vec1)
 	case "Or":
 		gotv = vec0.Or(vec1)
+	case "RotateLeft":
+		gotv = vec0.RotateLeft(vec1)
+	case "RotateRight":
+		gotv = vec0.RotateRight(vec1)
+	case "ShiftLeft":
+		gotv = vec0.ShiftLeft(vec1)
+	case "ShiftRight":
+		gotv = vec0.ShiftRight(vec1)
+	case "ShiftRightSignExtended":
+		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@@ -6439,6 +7505,16 @@ func testUint64x4BinaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64
 		gotv = vec0.MaskedMulEvenWiden(vec1, vec2.AsMask64x4())
 	case "MaskedOr":
 		gotv = vec0.MaskedOr(vec1, vec2.AsMask64x4())
+	case "MaskedRotateLeft":
+		gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask64x4())
+	case "MaskedRotateRight":
+		gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask64x4())
+	case "MaskedShiftLeft":
+		gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask64x4())
+	case "MaskedShiftRight":
+		gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask64x4())
+	case "MaskedShiftRightSignExtended":
+		gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask64x4())
 	case "MaskedSub":
 		gotv = vec0.MaskedSub(vec1, vec2.AsMask64x4())
 	case "MaskedXor":
@@ -6518,6 +7594,55 @@ func testUint64x4MaskedCompare(t *testing.T, v0 []uint64, v1 []uint64, v2 []int6
 	}
 }
 
+func testUint64x4Ternary(t *testing.T, v0 []uint64, v1 []uint64, v2 []uint64, want []uint64, which string) {
+	t.Helper()
+	var gotv simd.Uint64x4
+	got := make([]uint64, len(want))
+	vec0 := simd.LoadUint64x4Slice(v0)
+	vec1 := simd.LoadUint64x4Slice(v1)
+	vec2 := simd.LoadUint64x4Slice(v2)
+	switch which {
+	case "ShiftLeftAndFillUpperFrom":
+		gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2)
+	case "ShiftRightAndFillUpperFrom":
+		gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2)
+
+	default:
+		t.Errorf("Unknown method: Uint64x4.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
+func testUint64x4TernaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []uint64, v3 []int64, want []uint64, which string) {
+	t.Helper()
+	var gotv simd.Uint64x4
+	got := make([]uint64, len(want))
+	vec0 := simd.LoadUint64x4Slice(v0)
+	vec1 := simd.LoadUint64x4Slice(v1)
+	vec2 := simd.LoadUint64x4Slice(v2)
+	vec3 := simd.LoadInt64x4Slice(v3)
+	switch which {
+	case "MaskedShiftLeftAndFillUpperFrom":
+		gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask64x4())
+	case "MaskedShiftRightAndFillUpperFrom":
+		gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask64x4())
+
+	default:
+		t.Errorf("Unknown method: Uint64x4.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testUint64x4Unary(t *testing.T, v0 []uint64, want []uint64, which string) {
 	t.Helper()
 	var gotv simd.Uint64x4
@@ -6580,6 +7705,16 @@ func testUint64x8Binary(t *testing.T, v0 []uint64, v1 []uint64, want []uint64, w
 		gotv = vec0.MulEvenWiden(vec1)
 	case "Or":
 		gotv = vec0.Or(vec1)
+	case "RotateLeft":
+		gotv = vec0.RotateLeft(vec1)
+	case "RotateRight":
+		gotv = vec0.RotateRight(vec1)
+	case "ShiftLeft":
+		gotv = vec0.ShiftLeft(vec1)
+	case "ShiftRight":
+		gotv = vec0.ShiftRight(vec1)
+	case "ShiftRightSignExtended":
+		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@@ -6618,6 +7753,16 @@ func testUint64x8BinaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64
 		gotv = vec0.MaskedMulEvenWiden(vec1, vec2.AsMask64x8())
 	case "MaskedOr":
 		gotv = vec0.MaskedOr(vec1, vec2.AsMask64x8())
+	case "MaskedRotateLeft":
+		gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask64x8())
+	case "MaskedRotateRight":
+		gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask64x8())
+	case "MaskedShiftLeft":
+		gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask64x8())
+	case "MaskedShiftRight":
+		gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask64x8())
+	case "MaskedShiftRightSignExtended":
+		gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask64x8())
 	case "MaskedSub":
 		gotv = vec0.MaskedSub(vec1, vec2.AsMask64x8())
 	case "MaskedXor":
@@ -6697,6 +7842,55 @@ func testUint64x8MaskedCompare(t *testing.T, v0 []uint64, v1 []uint64, v2 []int6
 	}
 }
 
+func testUint64x8Ternary(t *testing.T, v0 []uint64, v1 []uint64, v2 []uint64, want []uint64, which string) {
+	t.Helper()
+	var gotv simd.Uint64x8
+	got := make([]uint64, len(want))
+	vec0 := simd.LoadUint64x8Slice(v0)
+	vec1 := simd.LoadUint64x8Slice(v1)
+	vec2 := simd.LoadUint64x8Slice(v2)
+	switch which {
+	case "ShiftLeftAndFillUpperFrom":
+		gotv = vec0.ShiftLeftAndFillUpperFrom(vec1, vec2)
+	case "ShiftRightAndFillUpperFrom":
+		gotv = vec0.ShiftRightAndFillUpperFrom(vec1, vec2)
+
+	default:
+		t.Errorf("Unknown method: Uint64x8.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
+func testUint64x8TernaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []uint64, v3 []int64, want []uint64, which string) {
+	t.Helper()
+	var gotv simd.Uint64x8
+	got := make([]uint64, len(want))
+	vec0 := simd.LoadUint64x8Slice(v0)
+	vec1 := simd.LoadUint64x8Slice(v1)
+	vec2 := simd.LoadUint64x8Slice(v2)
+	vec3 := simd.LoadInt64x8Slice(v3)
+	switch which {
+	case "MaskedShiftLeftAndFillUpperFrom":
+		gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask64x8())
+	case "MaskedShiftRightAndFillUpperFrom":
+		gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask64x8())
+
+	default:
+		t.Errorf("Unknown method: Uint64x8.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+		if got[i] != want[i] {
+			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+		}
+	}
+}
+
 func testUint64x8Unary(t *testing.T, v0 []uint64, want []uint64, which string) {
 	t.Helper()
 	var gotv simd.Uint64x8
@@ -6737,3 +7931,54 @@ func testUint64x8UnaryMasked(t *testing.T, v0 []uint64, v1 []int64, want []uint6
 		}
 	}
 }
+
+/* The operations below cannot be tested via wrappers, please test them directly */
+
+// CeilSuppressExceptionWithPrecision
+// CeilWithPrecision
+// DiffWithCeilSuppressExceptionWithPrecision
+// DiffWithCeilWithPrecision
+// DiffWithFloorSuppressExceptionWithPrecision
+// DiffWithFloorWithPrecision
+// DiffWithRoundSuppressExceptionWithPrecision
+// DiffWithRoundWithPrecision
+// DiffWithTruncSuppressExceptionWithPrecision
+// DiffWithTruncWithPrecision
+// FloorSuppressExceptionWithPrecision
+// FloorWithPrecision
+// GetElem
+// MaskedCeilSuppressExceptionWithPrecision
+// MaskedCeilWithPrecision
+// MaskedDiffWithCeilSuppressExceptionWithPrecision
+// MaskedDiffWithCeilWithPrecision
+// MaskedDiffWithFloorSuppressExceptionWithPrecision
+// MaskedDiffWithFloorWithPrecision
+// MaskedDiffWithRoundSuppressExceptionWithPrecision
+// MaskedDiffWithRoundWithPrecision
+// MaskedDiffWithTruncSuppressExceptionWithPrecision
+// MaskedDiffWithTruncWithPrecision
+// MaskedFloorSuppressExceptionWithPrecision
+// MaskedFloorWithPrecision
+// MaskedRotateAllLeft
+// MaskedRotateAllRight
+// MaskedRoundSuppressExceptionWithPrecision
+// MaskedRoundWithPrecision
+// MaskedShiftAllLeft
+// MaskedShiftAllLeftAndFillUpperFrom
+// MaskedShiftAllRight
+// MaskedShiftAllRightAndFillUpperFrom
+// MaskedShiftAllRightSignExtended
+// MaskedTruncSuppressExceptionWithPrecision
+// MaskedTruncWithPrecision
+// RotateAllLeft
+// RotateAllRight
+// RoundSuppressExceptionWithPrecision
+// RoundWithPrecision
+// SetElem
+// ShiftAllLeft
+// ShiftAllLeftAndFillUpperFrom
+// ShiftAllRight
+// ShiftAllRightAndFillUpperFrom
+// ShiftAllRightSignExtended
+// TruncSuppressExceptionWithPrecision
+// TruncWithPrecision
diff --git a/src/simd/stubs_amd64.go b/src/simd/stubs_amd64.go
index 5037e4e024e..330ad6aca2a 100644
--- a/src/simd/stubs_amd64.go
+++ b/src/simd/stubs_amd64.go
@@ -5178,6 +5178,254 @@ func (x Uint64x4) MaskedPopCount(y Mask64x4) Uint64x4
 // Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
 func (x Uint64x8) MaskedPopCount(y Mask64x8) Uint64x8
 
+/* MaskedRotateAllLeft */
+
+// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+//
+// Asm: VPROLD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedRotateAllLeft(imm uint8, y Mask32x4) Int32x4
+
+// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+//
+// Asm: VPROLD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedRotateAllLeft(imm uint8, y Mask32x8) Int32x8
+
+// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+//
+// Asm: VPROLD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedRotateAllLeft(imm uint8, y Mask32x16) Int32x16
+
+// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+//
+// Asm: VPROLQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedRotateAllLeft(imm uint8, y Mask64x2) Int64x2
+
+// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+//
+// Asm: VPROLQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedRotateAllLeft(imm uint8, y Mask64x4) Int64x4
+
+// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+//
+// Asm: VPROLQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedRotateAllLeft(imm uint8, y Mask64x8) Int64x8
+
+// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+//
+// Asm: VPROLD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedRotateAllLeft(imm uint8, y Mask32x4) Uint32x4
+
+// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+//
+// Asm: VPROLD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedRotateAllLeft(imm uint8, y Mask32x8) Uint32x8
+
+// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+//
+// Asm: VPROLD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedRotateAllLeft(imm uint8, y Mask32x16) Uint32x16
+
+// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+//
+// Asm: VPROLQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedRotateAllLeft(imm uint8, y Mask64x2) Uint64x2
+
+// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+//
+// Asm: VPROLQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedRotateAllLeft(imm uint8, y Mask64x4) Uint64x4
+
+// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+//
+// Asm: VPROLQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedRotateAllLeft(imm uint8, y Mask64x8) Uint64x8
+
+/* MaskedRotateAllRight */
+
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+//
+// Asm: VPRORD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedRotateAllRight(imm uint8, y Mask32x4) Int32x4
+
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+//
+// Asm: VPRORD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedRotateAllRight(imm uint8, y Mask32x8) Int32x8
+
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+//
+// Asm: VPRORD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedRotateAllRight(imm uint8, y Mask32x16) Int32x16
+
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+//
+// Asm: VPRORQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedRotateAllRight(imm uint8, y Mask64x2) Int64x2
+
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+//
+// Asm: VPRORQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedRotateAllRight(imm uint8, y Mask64x4) Int64x4
+
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+//
+// Asm: VPRORQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedRotateAllRight(imm uint8, y Mask64x8) Int64x8
+
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+//
+// Asm: VPRORD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedRotateAllRight(imm uint8, y Mask32x4) Uint32x4
+
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+//
+// Asm: VPRORD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedRotateAllRight(imm uint8, y Mask32x8) Uint32x8
+
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+//
+// Asm: VPRORD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedRotateAllRight(imm uint8, y Mask32x16) Uint32x16
+
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+//
+// Asm: VPRORQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedRotateAllRight(imm uint8, y Mask64x2) Uint64x2
+
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+//
+// Asm: VPRORQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedRotateAllRight(imm uint8, y Mask64x4) Uint64x4
+
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+//
+// Asm: VPRORQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedRotateAllRight(imm uint8, y Mask64x8) Uint64x8
+
+/* MaskedRotateLeft */
+
+// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPROLVD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedRotateLeft(y Int32x4, z Mask32x4) Int32x4
+
+// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPROLVD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedRotateLeft(y Int32x8, z Mask32x8) Int32x8
+
+// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPROLVD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedRotateLeft(y Int32x16, z Mask32x16) Int32x16
+
+// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPROLVQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedRotateLeft(y Int64x2, z Mask64x2) Int64x2
+
+// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPROLVQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedRotateLeft(y Int64x4, z Mask64x4) Int64x4
+
+// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPROLVQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedRotateLeft(y Int64x8, z Mask64x8) Int64x8
+
+// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPROLVD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedRotateLeft(y Uint32x4, z Mask32x4) Uint32x4
+
+// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPROLVD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedRotateLeft(y Uint32x8, z Mask32x8) Uint32x8
+
+// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPROLVD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedRotateLeft(y Uint32x16, z Mask32x16) Uint32x16
+
+// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPROLVQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedRotateLeft(y Uint64x2, z Mask64x2) Uint64x2
+
+// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPROLVQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedRotateLeft(y Uint64x4, z Mask64x4) Uint64x4
+
+// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPROLVQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedRotateLeft(y Uint64x8, z Mask64x8) Uint64x8
+
+/* MaskedRotateRight */
+
+// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPRORVD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedRotateRight(y Int32x4, z Mask32x4) Int32x4
+
+// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPRORVD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedRotateRight(y Int32x8, z Mask32x8) Int32x8
+
+// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPRORVD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedRotateRight(y Int32x16, z Mask32x16) Int32x16
+
+// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPRORVQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedRotateRight(y Int64x2, z Mask64x2) Int64x2
+
+// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPRORVQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedRotateRight(y Int64x4, z Mask64x4) Int64x4
+
+// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPRORVQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedRotateRight(y Int64x8, z Mask64x8) Int64x8
+
+// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPRORVD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedRotateRight(y Uint32x4, z Mask32x4) Uint32x4
+
+// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPRORVD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedRotateRight(y Uint32x8, z Mask32x8) Uint32x8
+
+// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPRORVD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedRotateRight(y Uint32x16, z Mask32x16) Uint32x16
+
+// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPRORVQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedRotateRight(y Uint64x2, z Mask64x2) Uint64x2
+
+// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPRORVQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedRotateRight(y Uint64x4, z Mask64x4) Uint64x4
+
+// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPRORVQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedRotateRight(y Uint64x8, z Mask64x8) Uint64x8
+
 /* MaskedRoundSuppressExceptionWithPrecision */
 
 // RoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions.
@@ -5447,6 +5695,803 @@ func (x Uint32x8) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32,
 // Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
 func (x Uint32x16) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64, u Mask32x16) Uint32x16
 
+/* MaskedShiftAllLeft */
+
+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedShiftAllLeft(y uint64, z Mask64x2) Int64x2
+
+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedShiftAllLeft(y uint64, z Mask64x4) Int64x4
+
+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedShiftAllLeft(y uint64, z Mask64x8) Int64x8
+
+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedShiftAllLeft(y uint64, z Mask64x2) Uint64x2
+
+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedShiftAllLeft(y uint64, z Mask64x4) Uint64x4
+
+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedShiftAllLeft(y uint64, z Mask64x8) Uint64x8
+
+/* MaskedShiftAllLeftAndFillUpperFrom */
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDW, CPU Feature: AVX512EVEX
+func (x Int16x8) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int16x8, z Mask16x8) Int16x8
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDW, CPU Feature: AVX512EVEX
+func (x Int16x16) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int16x16, z Mask16x16) Int16x16
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDW, CPU Feature: AVX512EVEX
+func (x Int16x32) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int16x32, z Mask16x32) Int16x32
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int32x4, z Mask32x4) Int32x4
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int32x8, z Mask32x8) Int32x8
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int32x16, z Mask32x16) Int32x16
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int64x2, z Mask64x2) Int64x2
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int64x4, z Mask64x4) Int64x4
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int64x8, z Mask64x8) Int64x8
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDW, CPU Feature: AVX512EVEX
+func (x Uint16x8) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint16x8, z Mask16x8) Uint16x8
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDW, CPU Feature: AVX512EVEX
+func (x Uint16x16) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint16x16, z Mask16x16) Uint16x16
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDW, CPU Feature: AVX512EVEX
+func (x Uint16x32) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint16x32, z Mask16x32) Uint16x32
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint32x4, z Mask32x4) Uint32x4
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint32x8, z Mask32x8) Uint32x8
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint32x16, z Mask32x16) Uint32x16
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint64x2, z Mask64x2) Uint64x2
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint64x4, z Mask64x4) Uint64x4
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint64x8, z Mask64x8) Uint64x8
+
+/* MaskedShiftAllRight */
+
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedShiftAllRight(y uint64, z Mask64x2) Int64x2
+
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedShiftAllRight(y uint64, z Mask64x4) Int64x4
+
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedShiftAllRight(y uint64, z Mask64x8) Int64x8
+
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedShiftAllRight(y uint64, z Mask64x2) Uint64x2
+
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedShiftAllRight(y uint64, z Mask64x4) Uint64x4
+
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedShiftAllRight(y uint64, z Mask64x8) Uint64x8
+
+/* MaskedShiftAllRightAndFillUpperFrom */
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDW, CPU Feature: AVX512EVEX
+func (x Int16x8) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int16x8, z Mask16x8) Int16x8
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDW, CPU Feature: AVX512EVEX
+func (x Int16x16) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int16x16, z Mask16x16) Int16x16
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDW, CPU Feature: AVX512EVEX
+func (x Int16x32) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int16x32, z Mask16x32) Int16x32
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int32x4, z Mask32x4) Int32x4
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int32x8, z Mask32x8) Int32x8
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int32x16, z Mask32x16) Int32x16
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int64x2, z Mask64x2) Int64x2
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int64x4, z Mask64x4) Int64x4
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int64x8, z Mask64x8) Int64x8
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDW, CPU Feature: AVX512EVEX
+func (x Uint16x8) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint16x8, z Mask16x8) Uint16x8
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDW, CPU Feature: AVX512EVEX
+func (x Uint16x16) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint16x16, z Mask16x16) Uint16x16
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDW, CPU Feature: AVX512EVEX
+func (x Uint16x32) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint16x32, z Mask16x32) Uint16x32
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint32x4, z Mask32x4) Uint32x4
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint32x8, z Mask32x8) Uint32x8
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint32x16, z Mask32x16) Uint32x16
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint64x2, z Mask64x2) Uint64x2
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint64x4, z Mask64x4) Uint64x4
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint64x8, z Mask64x8) Uint64x8
+
+/* MaskedShiftAllRightSignExtended */
+
+// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedShiftAllRightSignExtended(y uint64, z Mask64x2) Int64x2
+
+// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedShiftAllRightSignExtended(y uint64, z Mask64x4) Int64x4
+
+// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedShiftAllRightSignExtended(y uint64, z Mask64x8) Int64x8
+
+/* MaskedShiftLeft */
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVW, CPU Feature: AVX512EVEX
+func (x Int16x8) MaskedShiftLeft(y Int16x8, z Mask16x8) Int16x8
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVW, CPU Feature: AVX512EVEX
+func (x Int16x16) MaskedShiftLeft(y Int16x16, z Mask16x16) Int16x16
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVW, CPU Feature: AVX512EVEX
+func (x Int16x32) MaskedShiftLeft(y Int16x32, z Mask16x32) Int16x32
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedShiftLeft(y Int32x4, z Mask32x4) Int32x4
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedShiftLeft(y Int32x8, z Mask32x8) Int32x8
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedShiftLeft(y Int32x16, z Mask32x16) Int32x16
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedShiftLeft(y Int64x2, z Mask64x2) Int64x2
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedShiftLeft(y Int64x4, z Mask64x4) Int64x4
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedShiftLeft(y Int64x8, z Mask64x8) Int64x8
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVW, CPU Feature: AVX512EVEX
+func (x Uint16x8) MaskedShiftLeft(y Uint16x8, z Mask16x8) Uint16x8
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVW, CPU Feature: AVX512EVEX
+func (x Uint16x16) MaskedShiftLeft(y Uint16x16, z Mask16x16) Uint16x16
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVW, CPU Feature: AVX512EVEX
+func (x Uint16x32) MaskedShiftLeft(y Uint16x32, z Mask16x32) Uint16x32
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedShiftLeft(y Uint32x4, z Mask32x4) Uint32x4
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedShiftLeft(y Uint32x8, z Mask32x8) Uint32x8
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedShiftLeft(y Uint32x16, z Mask32x16) Uint32x16
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedShiftLeft(y Uint64x2, z Mask64x2) Uint64x2
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedShiftLeft(y Uint64x4, z Mask64x4) Uint64x4
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedShiftLeft(y Uint64x8, z Mask64x8) Uint64x8
+
+/* MaskedShiftLeftAndFillUpperFrom */
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVW, CPU Feature: AVX512EVEX
+func (x Int16x8) MaskedShiftLeftAndFillUpperFrom(y Int16x8, z Int16x8, u Mask16x8) Int16x8
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVW, CPU Feature: AVX512EVEX
+func (x Int16x16) MaskedShiftLeftAndFillUpperFrom(y Int16x16, z Int16x16, u Mask16x16) Int16x16
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVW, CPU Feature: AVX512EVEX
+func (x Int16x32) MaskedShiftLeftAndFillUpperFrom(y Int16x32, z Int16x32, u Mask16x32) Int16x32
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedShiftLeftAndFillUpperFrom(y Int32x4, z Int32x4, u Mask32x4) Int32x4
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedShiftLeftAndFillUpperFrom(y Int32x8, z Int32x8, u Mask32x8) Int32x8
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedShiftLeftAndFillUpperFrom(y Int32x16, z Int32x16, u Mask32x16) Int32x16
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedShiftLeftAndFillUpperFrom(y Int64x2, z Int64x2, u Mask64x2) Int64x2
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedShiftLeftAndFillUpperFrom(y Int64x4, z Int64x4, u Mask64x4) Int64x4
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedShiftLeftAndFillUpperFrom(y Int64x8, z Int64x8, u Mask64x8) Int64x8
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVW, CPU Feature: AVX512EVEX
+func (x Uint16x8) MaskedShiftLeftAndFillUpperFrom(y Uint16x8, z Uint16x8, u Mask16x8) Uint16x8
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVW, CPU Feature: AVX512EVEX
+func (x Uint16x16) MaskedShiftLeftAndFillUpperFrom(y Uint16x16, z Uint16x16, u Mask16x16) Uint16x16
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVW, CPU Feature: AVX512EVEX
+func (x Uint16x32) MaskedShiftLeftAndFillUpperFrom(y Uint16x32, z Uint16x32, u Mask16x32) Uint16x32
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedShiftLeftAndFillUpperFrom(y Uint32x4, z Uint32x4, u Mask32x4) Uint32x4
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedShiftLeftAndFillUpperFrom(y Uint32x8, z Uint32x8, u Mask32x8) Uint32x8
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedShiftLeftAndFillUpperFrom(y Uint32x16, z Uint32x16, u Mask32x16) Uint32x16
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedShiftLeftAndFillUpperFrom(y Uint64x2, z Uint64x2, u Mask64x2) Uint64x2
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedShiftLeftAndFillUpperFrom(y Uint64x4, z Uint64x4, u Mask64x4) Uint64x4
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedShiftLeftAndFillUpperFrom(y Uint64x8, z Uint64x8, u Mask64x8) Uint64x8
+
+/* MaskedShiftRight */
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+func (x Int16x8) MaskedShiftRight(y Int16x8, z Mask16x8) Int16x8
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+func (x Int16x16) MaskedShiftRight(y Int16x16, z Mask16x16) Int16x16
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+func (x Int16x32) MaskedShiftRight(y Int16x32, z Mask16x32) Int16x32
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedShiftRight(y Int32x4, z Mask32x4) Int32x4
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedShiftRight(y Int32x8, z Mask32x8) Int32x8
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedShiftRight(y Int32x16, z Mask32x16) Int32x16
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedShiftRight(y Int64x2, z Mask64x2) Int64x2
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedShiftRight(y Int64x4, z Mask64x4) Int64x4
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedShiftRight(y Int64x8, z Mask64x8) Int64x8
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+func (x Uint16x8) MaskedShiftRight(y Uint16x8, z Mask16x8) Uint16x8
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+func (x Uint16x16) MaskedShiftRight(y Uint16x16, z Mask16x16) Uint16x16
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+func (x Uint16x32) MaskedShiftRight(y Uint16x32, z Mask16x32) Uint16x32
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedShiftRight(y Uint32x4, z Mask32x4) Uint32x4
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedShiftRight(y Uint32x8, z Mask32x8) Uint32x8
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedShiftRight(y Uint32x16, z Mask32x16) Uint32x16
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedShiftRight(y Uint64x2, z Mask64x2) Uint64x2
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedShiftRight(y Uint64x4, z Mask64x4) Uint64x4
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedShiftRight(y Uint64x8, z Mask64x8) Uint64x8
+
+/* MaskedShiftRightAndFillUpperFrom */
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVW, CPU Feature: AVX512EVEX
+func (x Int16x8) MaskedShiftRightAndFillUpperFrom(y Int16x8, z Int16x8, u Mask16x8) Int16x8
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVW, CPU Feature: AVX512EVEX
+func (x Int16x16) MaskedShiftRightAndFillUpperFrom(y Int16x16, z Int16x16, u Mask16x16) Int16x16
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVW, CPU Feature: AVX512EVEX
+func (x Int16x32) MaskedShiftRightAndFillUpperFrom(y Int16x32, z Int16x32, u Mask16x32) Int16x32
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedShiftRightAndFillUpperFrom(y Int32x4, z Int32x4, u Mask32x4) Int32x4
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedShiftRightAndFillUpperFrom(y Int32x8, z Int32x8, u Mask32x8) Int32x8
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedShiftRightAndFillUpperFrom(y Int32x16, z Int32x16, u Mask32x16) Int32x16
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedShiftRightAndFillUpperFrom(y Int64x2, z Int64x2, u Mask64x2) Int64x2
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedShiftRightAndFillUpperFrom(y Int64x4, z Int64x4, u Mask64x4) Int64x4
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedShiftRightAndFillUpperFrom(y Int64x8, z Int64x8, u Mask64x8) Int64x8
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVW, CPU Feature: AVX512EVEX
+func (x Uint16x8) MaskedShiftRightAndFillUpperFrom(y Uint16x8, z Uint16x8, u Mask16x8) Uint16x8
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVW, CPU Feature: AVX512EVEX
+func (x Uint16x16) MaskedShiftRightAndFillUpperFrom(y Uint16x16, z Uint16x16, u Mask16x16) Uint16x16
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVW, CPU Feature: AVX512EVEX
+func (x Uint16x32) MaskedShiftRightAndFillUpperFrom(y Uint16x32, z Uint16x32, u Mask16x32) Uint16x32
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedShiftRightAndFillUpperFrom(y Uint32x4, z Uint32x4, u Mask32x4) Uint32x4
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedShiftRightAndFillUpperFrom(y Uint32x8, z Uint32x8, u Mask32x8) Uint32x8
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedShiftRightAndFillUpperFrom(y Uint32x16, z Uint32x16, u Mask32x16) Uint32x16
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedShiftRightAndFillUpperFrom(y Uint64x2, z Uint64x2, u Mask64x2) Uint64x2
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedShiftRightAndFillUpperFrom(y Uint64x4, z Uint64x4, u Mask64x4) Uint64x4
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedShiftRightAndFillUpperFrom(y Uint64x8, z Uint64x8, u Mask64x8) Uint64x8
+
+/* MaskedShiftRightSignExtended */
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVW, CPU Feature: AVX512EVEX
+func (x Int16x8) MaskedShiftRightSignExtended(y Int16x8, z Mask16x8) Int16x8
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVW, CPU Feature: AVX512EVEX
+func (x Int16x16) MaskedShiftRightSignExtended(y Int16x16, z Mask16x16) Int16x16
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVW, CPU Feature: AVX512EVEX
+func (x Int16x32) MaskedShiftRightSignExtended(y Int16x32, z Mask16x32) Int16x32
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedShiftRightSignExtended(y Int32x4, z Mask32x4) Int32x4
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedShiftRightSignExtended(y Int32x8, z Mask32x8) Int32x8
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedShiftRightSignExtended(y Int32x16, z Mask32x16) Int32x16
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
+func (x Int64x2) MaskedShiftRightSignExtended(y Int64x2, z Mask64x2) Int64x2
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
+func (x Int64x4) MaskedShiftRightSignExtended(y Int64x4, z Mask64x4) Int64x4
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
+func (x Int64x8) MaskedShiftRightSignExtended(y Int64x8, z Mask64x8) Int64x8
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVW, CPU Feature: AVX512EVEX
+func (x Uint16x8) MaskedShiftRightSignExtended(y Uint16x8, z Mask16x8) Uint16x8
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVW, CPU Feature: AVX512EVEX
+func (x Uint16x16) MaskedShiftRightSignExtended(y Uint16x16, z Mask16x16) Uint16x16
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVW, CPU Feature: AVX512EVEX
+func (x Uint16x32) MaskedShiftRightSignExtended(y Uint16x32, z Mask16x32) Uint16x32
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedShiftRightSignExtended(y Uint32x4, z Mask32x4) Uint32x4
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedShiftRightSignExtended(y Uint32x8, z Mask32x8) Uint32x8
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedShiftRightSignExtended(y Uint32x16, z Mask32x16) Uint32x16
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) MaskedShiftRightSignExtended(y Uint64x2, z Mask64x2) Uint64x2
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) MaskedShiftRightSignExtended(y Uint64x4, z Mask64x4) Uint64x4
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) MaskedShiftRightSignExtended(y Uint64x8, z Mask64x8) Uint64x8
+
 /* MaskedSqrt */
 
 // Sqrt computes the square root of each element.
@@ -6961,6 +8006,254 @@ func (x Uint64x4) PopCount() Uint64x4
 // Asm: VPOPCNTQ, CPU Feature: AVX512EVEX
 func (x Uint64x8) PopCount() Uint64x8
 
+/* RotateAllLeft */
+
+// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+//
+// Asm: VPROLD, CPU Feature: AVX512EVEX
+func (x Int32x4) RotateAllLeft(imm8 uint8) Int32x4
+
+// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+//
+// Asm: VPROLD, CPU Feature: AVX512EVEX
+func (x Int32x8) RotateAllLeft(imm8 uint8) Int32x8
+
+// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+//
+// Asm: VPROLD, CPU Feature: AVX512EVEX
+func (x Int32x16) RotateAllLeft(imm8 uint8) Int32x16
+
+// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+//
+// Asm: VPROLQ, CPU Feature: AVX512EVEX
+func (x Int64x2) RotateAllLeft(imm8 uint8) Int64x2
+
+// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+//
+// Asm: VPROLQ, CPU Feature: AVX512EVEX
+func (x Int64x4) RotateAllLeft(imm8 uint8) Int64x4
+
+// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+//
+// Asm: VPROLQ, CPU Feature: AVX512EVEX
+func (x Int64x8) RotateAllLeft(imm8 uint8) Int64x8
+
+// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+//
+// Asm: VPROLD, CPU Feature: AVX512EVEX
+func (x Uint32x4) RotateAllLeft(imm8 uint8) Uint32x4
+
+// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+//
+// Asm: VPROLD, CPU Feature: AVX512EVEX
+func (x Uint32x8) RotateAllLeft(imm8 uint8) Uint32x8
+
+// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+//
+// Asm: VPROLD, CPU Feature: AVX512EVEX
+func (x Uint32x16) RotateAllLeft(imm8 uint8) Uint32x16
+
+// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+//
+// Asm: VPROLQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) RotateAllLeft(imm8 uint8) Uint64x2
+
+// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+//
+// Asm: VPROLQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) RotateAllLeft(imm8 uint8) Uint64x4
+
+// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+//
+// Asm: VPROLQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) RotateAllLeft(imm8 uint8) Uint64x8
+
+/* RotateAllRight */
+
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+//
+// Asm: VPRORD, CPU Feature: AVX512EVEX
+func (x Int32x4) RotateAllRight(imm8 uint8) Int32x4
+
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+//
+// Asm: VPRORD, CPU Feature: AVX512EVEX
+func (x Int32x8) RotateAllRight(imm8 uint8) Int32x8
+
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+//
+// Asm: VPRORD, CPU Feature: AVX512EVEX
+func (x Int32x16) RotateAllRight(imm8 uint8) Int32x16
+
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+//
+// Asm: VPRORQ, CPU Feature: AVX512EVEX
+func (x Int64x2) RotateAllRight(imm8 uint8) Int64x2
+
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+//
+// Asm: VPRORQ, CPU Feature: AVX512EVEX
+func (x Int64x4) RotateAllRight(imm8 uint8) Int64x4
+
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+//
+// Asm: VPRORQ, CPU Feature: AVX512EVEX
+func (x Int64x8) RotateAllRight(imm8 uint8) Int64x8
+
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+//
+// Asm: VPRORD, CPU Feature: AVX512EVEX
+func (x Uint32x4) RotateAllRight(imm8 uint8) Uint32x4
+
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+//
+// Asm: VPRORD, CPU Feature: AVX512EVEX
+func (x Uint32x8) RotateAllRight(imm8 uint8) Uint32x8
+
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+//
+// Asm: VPRORD, CPU Feature: AVX512EVEX
+func (x Uint32x16) RotateAllRight(imm8 uint8) Uint32x16
+
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+//
+// Asm: VPRORQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) RotateAllRight(imm8 uint8) Uint64x2
+
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+//
+// Asm: VPRORQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) RotateAllRight(imm8 uint8) Uint64x4
+
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+//
+// Asm: VPRORQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) RotateAllRight(imm8 uint8) Uint64x8
+
+/* RotateLeft */
+
+// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPROLVD, CPU Feature: AVX512EVEX
+func (x Int32x4) RotateLeft(y Int32x4) Int32x4
+
+// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPROLVD, CPU Feature: AVX512EVEX
+func (x Int32x8) RotateLeft(y Int32x8) Int32x8
+
+// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPROLVD, CPU Feature: AVX512EVEX
+func (x Int32x16) RotateLeft(y Int32x16) Int32x16
+
+// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPROLVQ, CPU Feature: AVX512EVEX
+func (x Int64x2) RotateLeft(y Int64x2) Int64x2
+
+// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPROLVQ, CPU Feature: AVX512EVEX
+func (x Int64x4) RotateLeft(y Int64x4) Int64x4
+
+// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPROLVQ, CPU Feature: AVX512EVEX
+func (x Int64x8) RotateLeft(y Int64x8) Int64x8
+
+// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPROLVD, CPU Feature: AVX512EVEX
+func (x Uint32x4) RotateLeft(y Uint32x4) Uint32x4
+
+// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPROLVD, CPU Feature: AVX512EVEX
+func (x Uint32x8) RotateLeft(y Uint32x8) Uint32x8
+
+// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPROLVD, CPU Feature: AVX512EVEX
+func (x Uint32x16) RotateLeft(y Uint32x16) Uint32x16
+
+// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPROLVQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) RotateLeft(y Uint64x2) Uint64x2
+
+// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPROLVQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) RotateLeft(y Uint64x4) Uint64x4
+
+// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPROLVQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) RotateLeft(y Uint64x8) Uint64x8
+
+/* RotateRight */
+
+// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPRORVD, CPU Feature: AVX512EVEX
+func (x Int32x4) RotateRight(y Int32x4) Int32x4
+
+// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPRORVD, CPU Feature: AVX512EVEX
+func (x Int32x8) RotateRight(y Int32x8) Int32x8
+
+// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPRORVD, CPU Feature: AVX512EVEX
+func (x Int32x16) RotateRight(y Int32x16) Int32x16
+
+// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPRORVQ, CPU Feature: AVX512EVEX
+func (x Int64x2) RotateRight(y Int64x2) Int64x2
+
+// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPRORVQ, CPU Feature: AVX512EVEX
+func (x Int64x4) RotateRight(y Int64x4) Int64x4
+
+// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPRORVQ, CPU Feature: AVX512EVEX
+func (x Int64x8) RotateRight(y Int64x8) Int64x8
+
+// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPRORVD, CPU Feature: AVX512EVEX
+func (x Uint32x4) RotateRight(y Uint32x4) Uint32x4
+
+// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPRORVD, CPU Feature: AVX512EVEX
+func (x Uint32x8) RotateRight(y Uint32x8) Uint32x8
+
+// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPRORVD, CPU Feature: AVX512EVEX
+func (x Uint32x16) RotateRight(y Uint32x16) Uint32x16
+
+// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPRORVQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) RotateRight(y Uint64x2) Uint64x2
+
+// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPRORVQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) RotateRight(y Uint64x4) Uint64x4
+
+// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+//
+// Asm: VPRORVQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) RotateRight(y Uint64x8) Uint64x8
+
 /* Round */
 
 // Round rounds elements to the nearest integer.
@@ -7326,6 +8619,903 @@ func (x Uint32x4) SetElem(imm uint8, y uint32) Uint32x4
 // Asm: VPINSRQ, CPU Feature: AVX
 func (x Uint64x2) SetElem(imm uint8, y uint64) Uint64x2
 
+/* ShiftAllLeft */
+
+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLW, CPU Feature: AVX
+func (x Int16x8) ShiftAllLeft(y uint64) Int16x8
+
+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLW, CPU Feature: AVX2
+func (x Int16x16) ShiftAllLeft(y uint64) Int16x16
+
+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLD, CPU Feature: AVX
+func (x Int32x4) ShiftAllLeft(y uint64) Int32x4
+
+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLD, CPU Feature: AVX2
+func (x Int32x8) ShiftAllLeft(y uint64) Int32x8
+
+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLQ, CPU Feature: AVX
+func (x Int64x2) ShiftAllLeft(y uint64) Int64x2
+
+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLQ, CPU Feature: AVX2
+func (x Int64x4) ShiftAllLeft(y uint64) Int64x4
+
+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLQ, CPU Feature: AVX512EVEX
+func (x Int64x8) ShiftAllLeft(y uint64) Int64x8
+
+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLW, CPU Feature: AVX
+func (x Uint16x8) ShiftAllLeft(y uint64) Uint16x8
+
+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLW, CPU Feature: AVX2
+func (x Uint16x16) ShiftAllLeft(y uint64) Uint16x16
+
+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLD, CPU Feature: AVX
+func (x Uint32x4) ShiftAllLeft(y uint64) Uint32x4
+
+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLD, CPU Feature: AVX2
+func (x Uint32x8) ShiftAllLeft(y uint64) Uint32x8
+
+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLQ, CPU Feature: AVX
+func (x Uint64x2) ShiftAllLeft(y uint64) Uint64x2
+
+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLQ, CPU Feature: AVX2
+func (x Uint64x4) ShiftAllLeft(y uint64) Uint64x4
+
+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) ShiftAllLeft(y uint64) Uint64x8
+
+/* ShiftAllLeftAndFillUpperFrom */
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDW, CPU Feature: AVX512EVEX
+func (x Int16x8) ShiftAllLeftAndFillUpperFrom(imm uint8, y Int16x8) Int16x8
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDW, CPU Feature: AVX512EVEX
+func (x Int16x16) ShiftAllLeftAndFillUpperFrom(imm uint8, y Int16x16) Int16x16
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDW, CPU Feature: AVX512EVEX
+func (x Int16x32) ShiftAllLeftAndFillUpperFrom(imm uint8, y Int16x32) Int16x32
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDD, CPU Feature: AVX512EVEX
+func (x Int32x4) ShiftAllLeftAndFillUpperFrom(imm uint8, y Int32x4) Int32x4
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDD, CPU Feature: AVX512EVEX
+func (x Int32x8) ShiftAllLeftAndFillUpperFrom(imm uint8, y Int32x8) Int32x8
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDD, CPU Feature: AVX512EVEX
+func (x Int32x16) ShiftAllLeftAndFillUpperFrom(imm uint8, y Int32x16) Int32x16
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDQ, CPU Feature: AVX512EVEX
+func (x Int64x2) ShiftAllLeftAndFillUpperFrom(imm uint8, y Int64x2) Int64x2
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDQ, CPU Feature: AVX512EVEX
+func (x Int64x4) ShiftAllLeftAndFillUpperFrom(imm uint8, y Int64x4) Int64x4
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDQ, CPU Feature: AVX512EVEX
+func (x Int64x8) ShiftAllLeftAndFillUpperFrom(imm uint8, y Int64x8) Int64x8
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDW, CPU Feature: AVX512EVEX
+func (x Uint16x8) ShiftAllLeftAndFillUpperFrom(imm uint8, y Uint16x8) Uint16x8
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDW, CPU Feature: AVX512EVEX
+func (x Uint16x16) ShiftAllLeftAndFillUpperFrom(imm uint8, y Uint16x16) Uint16x16
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDW, CPU Feature: AVX512EVEX
+func (x Uint16x32) ShiftAllLeftAndFillUpperFrom(imm uint8, y Uint16x32) Uint16x32
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDD, CPU Feature: AVX512EVEX
+func (x Uint32x4) ShiftAllLeftAndFillUpperFrom(imm uint8, y Uint32x4) Uint32x4
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDD, CPU Feature: AVX512EVEX
+func (x Uint32x8) ShiftAllLeftAndFillUpperFrom(imm uint8, y Uint32x8) Uint32x8
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDD, CPU Feature: AVX512EVEX
+func (x Uint32x16) ShiftAllLeftAndFillUpperFrom(imm uint8, y Uint32x16) Uint32x16
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) ShiftAllLeftAndFillUpperFrom(imm uint8, y Uint64x2) Uint64x2
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) ShiftAllLeftAndFillUpperFrom(imm uint8, y Uint64x4) Uint64x4
+
+// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) ShiftAllLeftAndFillUpperFrom(imm uint8, y Uint64x8) Uint64x8
+
+/* ShiftAllRight */
+
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLW, CPU Feature: AVX
+func (x Int16x8) ShiftAllRight(y uint64) Int16x8
+
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLW, CPU Feature: AVX2
+func (x Int16x16) ShiftAllRight(y uint64) Int16x16
+
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLD, CPU Feature: AVX
+func (x Int32x4) ShiftAllRight(y uint64) Int32x4
+
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLD, CPU Feature: AVX2
+func (x Int32x8) ShiftAllRight(y uint64) Int32x8
+
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLQ, CPU Feature: AVX
+func (x Int64x2) ShiftAllRight(y uint64) Int64x2
+
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLQ, CPU Feature: AVX2
+func (x Int64x4) ShiftAllRight(y uint64) Int64x4
+
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLQ, CPU Feature: AVX512EVEX
+func (x Int64x8) ShiftAllRight(y uint64) Int64x8
+
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLW, CPU Feature: AVX
+func (x Uint16x8) ShiftAllRight(y uint64) Uint16x8
+
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLW, CPU Feature: AVX2
+func (x Uint16x16) ShiftAllRight(y uint64) Uint16x16
+
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLD, CPU Feature: AVX
+func (x Uint32x4) ShiftAllRight(y uint64) Uint32x4
+
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLD, CPU Feature: AVX2
+func (x Uint32x8) ShiftAllRight(y uint64) Uint32x8
+
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLQ, CPU Feature: AVX
+func (x Uint64x2) ShiftAllRight(y uint64) Uint64x2
+
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLQ, CPU Feature: AVX2
+func (x Uint64x4) ShiftAllRight(y uint64) Uint64x4
+
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) ShiftAllRight(y uint64) Uint64x8
+
+/* ShiftAllRightAndFillUpperFrom */
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDW, CPU Feature: AVX512EVEX
+func (x Int16x8) ShiftAllRightAndFillUpperFrom(imm uint8, y Int16x8) Int16x8
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDW, CPU Feature: AVX512EVEX
+func (x Int16x16) ShiftAllRightAndFillUpperFrom(imm uint8, y Int16x16) Int16x16
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDW, CPU Feature: AVX512EVEX
+func (x Int16x32) ShiftAllRightAndFillUpperFrom(imm uint8, y Int16x32) Int16x32
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDD, CPU Feature: AVX512EVEX
+func (x Int32x4) ShiftAllRightAndFillUpperFrom(imm uint8, y Int32x4) Int32x4
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDD, CPU Feature: AVX512EVEX
+func (x Int32x8) ShiftAllRightAndFillUpperFrom(imm uint8, y Int32x8) Int32x8
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDD, CPU Feature: AVX512EVEX
+func (x Int32x16) ShiftAllRightAndFillUpperFrom(imm uint8, y Int32x16) Int32x16
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDQ, CPU Feature: AVX512EVEX
+func (x Int64x2) ShiftAllRightAndFillUpperFrom(imm uint8, y Int64x2) Int64x2
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDQ, CPU Feature: AVX512EVEX
+func (x Int64x4) ShiftAllRightAndFillUpperFrom(imm uint8, y Int64x4) Int64x4
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDQ, CPU Feature: AVX512EVEX
+func (x Int64x8) ShiftAllRightAndFillUpperFrom(imm uint8, y Int64x8) Int64x8
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDW, CPU Feature: AVX512EVEX
+func (x Uint16x8) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint16x8) Uint16x8
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDW, CPU Feature: AVX512EVEX
+func (x Uint16x16) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint16x16) Uint16x16
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDW, CPU Feature: AVX512EVEX
+func (x Uint16x32) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint16x32) Uint16x32
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDD, CPU Feature: AVX512EVEX
+func (x Uint32x4) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint32x4) Uint32x4
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDD, CPU Feature: AVX512EVEX
+func (x Uint32x8) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint32x8) Uint32x8
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDD, CPU Feature: AVX512EVEX
+func (x Uint32x16) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint32x16) Uint32x16
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint64x2) Uint64x2
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint64x4) Uint64x4
+
+// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint64x8) Uint64x8
+
+/* ShiftAllRightSignExtended */
+
+// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAW, CPU Feature: AVX
+func (x Int16x8) ShiftAllRightSignExtended(y uint64) Int16x8
+
+// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAW, CPU Feature: AVX2
+func (x Int16x16) ShiftAllRightSignExtended(y uint64) Int16x16
+
+// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAD, CPU Feature: AVX
+func (x Int32x4) ShiftAllRightSignExtended(y uint64) Int32x4
+
+// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAD, CPU Feature: AVX2
+func (x Int32x8) ShiftAllRightSignExtended(y uint64) Int32x8
+
+// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAQ, CPU Feature: AVX512EVEX
+func (x Int64x2) ShiftAllRightSignExtended(y uint64) Int64x2
+
+// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAQ, CPU Feature: AVX512EVEX
+func (x Int64x4) ShiftAllRightSignExtended(y uint64) Int64x4
+
+// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAQ, CPU Feature: AVX512EVEX
+func (x Int64x8) ShiftAllRightSignExtended(y uint64) Int64x8
+
+/* ShiftLeft */
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVW, CPU Feature: AVX512EVEX
+func (x Int16x8) ShiftLeft(y Int16x8) Int16x8
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVW, CPU Feature: AVX512EVEX
+func (x Int16x16) ShiftLeft(y Int16x16) Int16x16
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVW, CPU Feature: AVX512EVEX
+func (x Int16x32) ShiftLeft(y Int16x32) Int16x32
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVD, CPU Feature: AVX2
+func (x Int32x4) ShiftLeft(y Int32x4) Int32x4
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVD, CPU Feature: AVX2
+func (x Int32x8) ShiftLeft(y Int32x8) Int32x8
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVD, CPU Feature: AVX512EVEX
+func (x Int32x16) ShiftLeft(y Int32x16) Int32x16
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVQ, CPU Feature: AVX2
+func (x Int64x2) ShiftLeft(y Int64x2) Int64x2
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVQ, CPU Feature: AVX2
+func (x Int64x4) ShiftLeft(y Int64x4) Int64x4
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVQ, CPU Feature: AVX512EVEX
+func (x Int64x8) ShiftLeft(y Int64x8) Int64x8
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVW, CPU Feature: AVX512EVEX
+func (x Uint16x8) ShiftLeft(y Uint16x8) Uint16x8
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVW, CPU Feature: AVX512EVEX
+func (x Uint16x16) ShiftLeft(y Uint16x16) Uint16x16
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVW, CPU Feature: AVX512EVEX
+func (x Uint16x32) ShiftLeft(y Uint16x32) Uint16x32
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVD, CPU Feature: AVX2
+func (x Uint32x4) ShiftLeft(y Uint32x4) Uint32x4
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVD, CPU Feature: AVX2
+func (x Uint32x8) ShiftLeft(y Uint32x8) Uint32x8
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVD, CPU Feature: AVX512EVEX
+func (x Uint32x16) ShiftLeft(y Uint32x16) Uint32x16
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVQ, CPU Feature: AVX2
+func (x Uint64x2) ShiftLeft(y Uint64x2) Uint64x2
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVQ, CPU Feature: AVX2
+func (x Uint64x4) ShiftLeft(y Uint64x4) Uint64x4
+
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLVQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) ShiftLeft(y Uint64x8) Uint64x8
+
+/* ShiftLeftAndFillUpperFrom */
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVW, CPU Feature: AVX512EVEX
+func (x Int16x8) ShiftLeftAndFillUpperFrom(y Int16x8, z Int16x8) Int16x8
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVW, CPU Feature: AVX512EVEX
+func (x Int16x16) ShiftLeftAndFillUpperFrom(y Int16x16, z Int16x16) Int16x16
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVW, CPU Feature: AVX512EVEX
+func (x Int16x32) ShiftLeftAndFillUpperFrom(y Int16x32, z Int16x32) Int16x32
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVD, CPU Feature: AVX512EVEX
+func (x Int32x4) ShiftLeftAndFillUpperFrom(y Int32x4, z Int32x4) Int32x4
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVD, CPU Feature: AVX512EVEX
+func (x Int32x8) ShiftLeftAndFillUpperFrom(y Int32x8, z Int32x8) Int32x8
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVD, CPU Feature: AVX512EVEX
+func (x Int32x16) ShiftLeftAndFillUpperFrom(y Int32x16, z Int32x16) Int32x16
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX
+func (x Int64x2) ShiftLeftAndFillUpperFrom(y Int64x2, z Int64x2) Int64x2
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX
+func (x Int64x4) ShiftLeftAndFillUpperFrom(y Int64x4, z Int64x4) Int64x4
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX
+func (x Int64x8) ShiftLeftAndFillUpperFrom(y Int64x8, z Int64x8) Int64x8
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVW, CPU Feature: AVX512EVEX
+func (x Uint16x8) ShiftLeftAndFillUpperFrom(y Uint16x8, z Uint16x8) Uint16x8
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVW, CPU Feature: AVX512EVEX
+func (x Uint16x16) ShiftLeftAndFillUpperFrom(y Uint16x16, z Uint16x16) Uint16x16
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVW, CPU Feature: AVX512EVEX
+func (x Uint16x32) ShiftLeftAndFillUpperFrom(y Uint16x32, z Uint16x32) Uint16x32
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVD, CPU Feature: AVX512EVEX
+func (x Uint32x4) ShiftLeftAndFillUpperFrom(y Uint32x4, z Uint32x4) Uint32x4
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVD, CPU Feature: AVX512EVEX
+func (x Uint32x8) ShiftLeftAndFillUpperFrom(y Uint32x8, z Uint32x8) Uint32x8
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVD, CPU Feature: AVX512EVEX
+func (x Uint32x16) ShiftLeftAndFillUpperFrom(y Uint32x16, z Uint32x16) Uint32x16
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) ShiftLeftAndFillUpperFrom(y Uint64x2, z Uint64x2) Uint64x2
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) ShiftLeftAndFillUpperFrom(y Uint64x4, z Uint64x4) Uint64x4
+
+// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+//
+// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) ShiftLeftAndFillUpperFrom(y Uint64x8, z Uint64x8) Uint64x8
+
+/* ShiftRight */
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+func (x Int16x8) ShiftRight(y Int16x8) Int16x8
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+func (x Int16x16) ShiftRight(y Int16x16) Int16x16
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+func (x Int16x32) ShiftRight(y Int16x32) Int16x32
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVD, CPU Feature: AVX2
+func (x Int32x4) ShiftRight(y Int32x4) Int32x4
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVD, CPU Feature: AVX2
+func (x Int32x8) ShiftRight(y Int32x8) Int32x8
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVD, CPU Feature: AVX512EVEX
+func (x Int32x16) ShiftRight(y Int32x16) Int32x16
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVQ, CPU Feature: AVX2
+func (x Int64x2) ShiftRight(y Int64x2) Int64x2
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVQ, CPU Feature: AVX2
+func (x Int64x4) ShiftRight(y Int64x4) Int64x4
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVQ, CPU Feature: AVX512EVEX
+func (x Int64x8) ShiftRight(y Int64x8) Int64x8
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+func (x Uint16x8) ShiftRight(y Uint16x8) Uint16x8
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+func (x Uint16x16) ShiftRight(y Uint16x16) Uint16x16
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+func (x Uint16x32) ShiftRight(y Uint16x32) Uint16x32
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVD, CPU Feature: AVX2
+func (x Uint32x4) ShiftRight(y Uint32x4) Uint32x4
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVD, CPU Feature: AVX2
+func (x Uint32x8) ShiftRight(y Uint32x8) Uint32x8
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVD, CPU Feature: AVX512EVEX
+func (x Uint32x16) ShiftRight(y Uint32x16) Uint32x16
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVQ, CPU Feature: AVX2
+func (x Uint64x2) ShiftRight(y Uint64x2) Uint64x2
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVQ, CPU Feature: AVX2
+func (x Uint64x4) ShiftRight(y Uint64x4) Uint64x4
+
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLVQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) ShiftRight(y Uint64x8) Uint64x8
+
+/* ShiftRightAndFillUpperFrom */
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVW, CPU Feature: AVX512EVEX
+func (x Int16x8) ShiftRightAndFillUpperFrom(y Int16x8, z Int16x8) Int16x8
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVW, CPU Feature: AVX512EVEX
+func (x Int16x16) ShiftRightAndFillUpperFrom(y Int16x16, z Int16x16) Int16x16
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVW, CPU Feature: AVX512EVEX
+func (x Int16x32) ShiftRightAndFillUpperFrom(y Int16x32, z Int16x32) Int16x32
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVD, CPU Feature: AVX512EVEX
+func (x Int32x4) ShiftRightAndFillUpperFrom(y Int32x4, z Int32x4) Int32x4
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVD, CPU Feature: AVX512EVEX
+func (x Int32x8) ShiftRightAndFillUpperFrom(y Int32x8, z Int32x8) Int32x8
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVD, CPU Feature: AVX512EVEX
+func (x Int32x16) ShiftRightAndFillUpperFrom(y Int32x16, z Int32x16) Int32x16
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX
+func (x Int64x2) ShiftRightAndFillUpperFrom(y Int64x2, z Int64x2) Int64x2
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX
+func (x Int64x4) ShiftRightAndFillUpperFrom(y Int64x4, z Int64x4) Int64x4
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX
+func (x Int64x8) ShiftRightAndFillUpperFrom(y Int64x8, z Int64x8) Int64x8
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVW, CPU Feature: AVX512EVEX
+func (x Uint16x8) ShiftRightAndFillUpperFrom(y Uint16x8, z Uint16x8) Uint16x8
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVW, CPU Feature: AVX512EVEX
+func (x Uint16x16) ShiftRightAndFillUpperFrom(y Uint16x16, z Uint16x16) Uint16x16
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVW, CPU Feature: AVX512EVEX
+func (x Uint16x32) ShiftRightAndFillUpperFrom(y Uint16x32, z Uint16x32) Uint16x32
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVD, CPU Feature: AVX512EVEX
+func (x Uint32x4) ShiftRightAndFillUpperFrom(y Uint32x4, z Uint32x4) Uint32x4
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVD, CPU Feature: AVX512EVEX
+func (x Uint32x8) ShiftRightAndFillUpperFrom(y Uint32x8, z Uint32x8) Uint32x8
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVD, CPU Feature: AVX512EVEX
+func (x Uint32x16) ShiftRightAndFillUpperFrom(y Uint32x16, z Uint32x16) Uint32x16
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) ShiftRightAndFillUpperFrom(y Uint64x2, z Uint64x2) Uint64x2
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) ShiftRightAndFillUpperFrom(y Uint64x4, z Uint64x4) Uint64x4
+
+// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+//
+// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) ShiftRightAndFillUpperFrom(y Uint64x8, z Uint64x8) Uint64x8
+
+/* ShiftRightSignExtended */
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVW, CPU Feature: AVX512EVEX
+func (x Int16x8) ShiftRightSignExtended(y Int16x8) Int16x8
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVW, CPU Feature: AVX512EVEX
+func (x Int16x16) ShiftRightSignExtended(y Int16x16) Int16x16
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVW, CPU Feature: AVX512EVEX
+func (x Int16x32) ShiftRightSignExtended(y Int16x32) Int16x32
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVD, CPU Feature: AVX2
+func (x Int32x4) ShiftRightSignExtended(y Int32x4) Int32x4
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVD, CPU Feature: AVX2
+func (x Int32x8) ShiftRightSignExtended(y Int32x8) Int32x8
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVD, CPU Feature: AVX512EVEX
+func (x Int32x16) ShiftRightSignExtended(y Int32x16) Int32x16
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
+func (x Int64x2) ShiftRightSignExtended(y Int64x2) Int64x2
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
+func (x Int64x4) ShiftRightSignExtended(y Int64x4) Int64x4
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
+func (x Int64x8) ShiftRightSignExtended(y Int64x8) Int64x8
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVW, CPU Feature: AVX512EVEX
+func (x Uint16x8) ShiftRightSignExtended(y Uint16x8) Uint16x8
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVW, CPU Feature: AVX512EVEX
+func (x Uint16x16) ShiftRightSignExtended(y Uint16x16) Uint16x16
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVW, CPU Feature: AVX512EVEX
+func (x Uint16x32) ShiftRightSignExtended(y Uint16x32) Uint16x32
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVD, CPU Feature: AVX2
+func (x Uint32x4) ShiftRightSignExtended(y Uint32x4) Uint32x4
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVD, CPU Feature: AVX2
+func (x Uint32x8) ShiftRightSignExtended(y Uint32x8) Uint32x8
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVD, CPU Feature: AVX512EVEX
+func (x Uint32x16) ShiftRightSignExtended(y Uint32x16) Uint32x16
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) ShiftRightSignExtended(y Uint64x2) Uint64x2
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) ShiftRightSignExtended(y Uint64x4) Uint64x4
+
+// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) ShiftRightSignExtended(y Uint64x8) Uint64x8
+
 /* Sign */
 
 // Sign returns the product of the first operand with -1, 0, or 1,