[dev.simd] cmd/compile, simd: adjust Shift.* operations

This CL does: 1. Removes ShiftRightSignExtended, default signed vectors to shift arithmetic, and unsigned to shift logical. 2. Add the missing Shifts which were left out by YAML error in the generator. This CL is generated by CL 687595. Change-Id: I663115498adb91c82e89a8476e6748794e997cfa Reviewed-on: https://go-review.googlesource.com/c/go/+/687596 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: David Chase <drchase@google.com> Reviewed-by: Cherry Mui <cherryyz@google.com>
2025-12-08 06:10:04 +00:00 · 2025-07-11 17:56:22 +00:00 · 2025-07-11 17:56:22 +00:00 · b69622b83e
commit b69622b83e
parent 4993a91ae1
9 changed files with 2021 additions and 1846 deletions
--- a/src/cmd/compile/internal/amd64/simdssa.go
+++ b/src/cmd/compile/internal/amd64/simdssa.go
@ -273,15 +273,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VPSLLVQ128,
 		ssa.OpAMD64VPSLLVQ256,
 		ssa.OpAMD64VPSLLVQ512,
-		ssa.OpAMD64VPSRLVW128,
-		ssa.OpAMD64VPSRLVW256,
-		ssa.OpAMD64VPSRLVW512,
-		ssa.OpAMD64VPSRLVD128,
-		ssa.OpAMD64VPSRLVD256,
-		ssa.OpAMD64VPSRLVD512,
-		ssa.OpAMD64VPSRLVQ128,
-		ssa.OpAMD64VPSRLVQ256,
-		ssa.OpAMD64VPSRLVQ512,
 		ssa.OpAMD64VPSRAVW128,
 		ssa.OpAMD64VPSRAVW256,
 		ssa.OpAMD64VPSRAVW512,
@ -291,6 +282,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VPSRAVQ128,
 		ssa.OpAMD64VPSRAVQ256,
 		ssa.OpAMD64VPSRAVQ512,
+		ssa.OpAMD64VPSRLVW128,
+		ssa.OpAMD64VPSRLVW256,
+		ssa.OpAMD64VPSRLVW512,
+		ssa.OpAMD64VPSRLVD128,
+		ssa.OpAMD64VPSRLVD256,
+		ssa.OpAMD64VPSRLVD512,
+		ssa.OpAMD64VPSRLVQ128,
+		ssa.OpAMD64VPSRLVQ256,
+		ssa.OpAMD64VPSRLVQ512,
 		ssa.OpAMD64VPSIGNB128,
 		ssa.OpAMD64VPSIGNB256,
 		ssa.OpAMD64VPSIGNW128,
@ -504,15 +504,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VPSLLVQMasked128,
 		ssa.OpAMD64VPSLLVQMasked256,
 		ssa.OpAMD64VPSLLVQMasked512,
-		ssa.OpAMD64VPSRLVWMasked128,
-		ssa.OpAMD64VPSRLVWMasked256,
-		ssa.OpAMD64VPSRLVWMasked512,
-		ssa.OpAMD64VPSRLVDMasked128,
-		ssa.OpAMD64VPSRLVDMasked256,
-		ssa.OpAMD64VPSRLVDMasked512,
-		ssa.OpAMD64VPSRLVQMasked128,
-		ssa.OpAMD64VPSRLVQMasked256,
-		ssa.OpAMD64VPSRLVQMasked512,
 		ssa.OpAMD64VPSRAVWMasked128,
 		ssa.OpAMD64VPSRAVWMasked256,
 		ssa.OpAMD64VPSRAVWMasked512,
@ -522,6 +513,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VPSRAVQMasked128,
 		ssa.OpAMD64VPSRAVQMasked256,
 		ssa.OpAMD64VPSRAVQMasked512,
+		ssa.OpAMD64VPSRLVWMasked128,
+		ssa.OpAMD64VPSRLVWMasked256,
+		ssa.OpAMD64VPSRLVWMasked512,
+		ssa.OpAMD64VPSRLVDMasked128,
+		ssa.OpAMD64VPSRLVDMasked256,
+		ssa.OpAMD64VPSRLVDMasked512,
+		ssa.OpAMD64VPSRLVQMasked128,
+		ssa.OpAMD64VPSRLVQMasked256,
+		ssa.OpAMD64VPSRLVQMasked512,
 		ssa.OpAMD64VSUBPSMasked128,
 		ssa.OpAMD64VSUBPSMasked256,
 		ssa.OpAMD64VSUBPSMasked512,
@ -845,36 +845,60 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {

 	case ssa.OpAMD64VPSLLW128,
 		ssa.OpAMD64VPSLLW256,
+		ssa.OpAMD64VPSLLW512,
 		ssa.OpAMD64VPSLLD128,
 		ssa.OpAMD64VPSLLD256,
+		ssa.OpAMD64VPSLLD512,
 		ssa.OpAMD64VPSLLQ128,
 		ssa.OpAMD64VPSLLQ256,
 		ssa.OpAMD64VPSLLQ512,
-		ssa.OpAMD64VPSRLW128,
-		ssa.OpAMD64VPSRLW256,
-		ssa.OpAMD64VPSRLD128,
-		ssa.OpAMD64VPSRLD256,
-		ssa.OpAMD64VPSRLQ128,
-		ssa.OpAMD64VPSRLQ256,
-		ssa.OpAMD64VPSRLQ512,
 		ssa.OpAMD64VPSRAW128,
 		ssa.OpAMD64VPSRAW256,
+		ssa.OpAMD64VPSRAW512,
 		ssa.OpAMD64VPSRAD128,
 		ssa.OpAMD64VPSRAD256,
+		ssa.OpAMD64VPSRAD512,
 		ssa.OpAMD64VPSRAQ128,
 		ssa.OpAMD64VPSRAQ256,
-		ssa.OpAMD64VPSRAQ512:
+		ssa.OpAMD64VPSRAQ512,
+		ssa.OpAMD64VPSRLW128,
+		ssa.OpAMD64VPSRLW256,
+		ssa.OpAMD64VPSRLW512,
+		ssa.OpAMD64VPSRLD128,
+		ssa.OpAMD64VPSRLD256,
+		ssa.OpAMD64VPSRLD512,
+		ssa.OpAMD64VPSRLQ128,
+		ssa.OpAMD64VPSRLQ256,
+		ssa.OpAMD64VPSRLQ512:
 		p = simdVfpv(s, v)

-	case ssa.OpAMD64VPSLLQMasked128,
+	case ssa.OpAMD64VPSLLWMasked128,
+		ssa.OpAMD64VPSLLWMasked256,
+		ssa.OpAMD64VPSLLWMasked512,
+		ssa.OpAMD64VPSLLDMasked128,
+		ssa.OpAMD64VPSLLDMasked256,
+		ssa.OpAMD64VPSLLDMasked512,
+		ssa.OpAMD64VPSLLQMasked128,
 		ssa.OpAMD64VPSLLQMasked256,
 		ssa.OpAMD64VPSLLQMasked512,
-		ssa.OpAMD64VPSRLQMasked128,
-		ssa.OpAMD64VPSRLQMasked256,
-		ssa.OpAMD64VPSRLQMasked512,
+		ssa.OpAMD64VPSRAWMasked128,
+		ssa.OpAMD64VPSRAWMasked256,
+		ssa.OpAMD64VPSRAWMasked512,
+		ssa.OpAMD64VPSRADMasked128,
+		ssa.OpAMD64VPSRADMasked256,
+		ssa.OpAMD64VPSRADMasked512,
 		ssa.OpAMD64VPSRAQMasked128,
 		ssa.OpAMD64VPSRAQMasked256,
-		ssa.OpAMD64VPSRAQMasked512:
+		ssa.OpAMD64VPSRAQMasked512,
+		ssa.OpAMD64VPSRLWMasked128,
+		ssa.OpAMD64VPSRLWMasked256,
+		ssa.OpAMD64VPSRLWMasked512,
+		ssa.OpAMD64VPSRLDMasked128,
+		ssa.OpAMD64VPSRLDMasked256,
+		ssa.OpAMD64VPSRLDMasked512,
+		ssa.OpAMD64VPSRLQMasked128,
+		ssa.OpAMD64VPSRLQMasked256,
+		ssa.OpAMD64VPSRLQMasked512:
 		p = simdVfpkv(s, v)

 	case ssa.OpAMD64VPINSRB128,
@ -1198,6 +1222,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VPSHLDQMasked128,
 		ssa.OpAMD64VPSHLDQMasked256,
 		ssa.OpAMD64VPSHLDQMasked512,
+		ssa.OpAMD64VPSLLWMasked128,
+		ssa.OpAMD64VPSLLWMasked256,
+		ssa.OpAMD64VPSLLWMasked512,
+		ssa.OpAMD64VPSLLDMasked128,
+		ssa.OpAMD64VPSLLDMasked256,
+		ssa.OpAMD64VPSLLDMasked512,
 		ssa.OpAMD64VPSLLQMasked128,
 		ssa.OpAMD64VPSLLQMasked256,
 		ssa.OpAMD64VPSLLQMasked512,
@ -1210,12 +1240,24 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VPSHRDQMasked128,
 		ssa.OpAMD64VPSHRDQMasked256,
 		ssa.OpAMD64VPSHRDQMasked512,
-		ssa.OpAMD64VPSRLQMasked128,
-		ssa.OpAMD64VPSRLQMasked256,
-		ssa.OpAMD64VPSRLQMasked512,
+		ssa.OpAMD64VPSRAWMasked128,
+		ssa.OpAMD64VPSRAWMasked256,
+		ssa.OpAMD64VPSRAWMasked512,
+		ssa.OpAMD64VPSRADMasked128,
+		ssa.OpAMD64VPSRADMasked256,
+		ssa.OpAMD64VPSRADMasked512,
 		ssa.OpAMD64VPSRAQMasked128,
 		ssa.OpAMD64VPSRAQMasked256,
 		ssa.OpAMD64VPSRAQMasked512,
+		ssa.OpAMD64VPSRLWMasked128,
+		ssa.OpAMD64VPSRLWMasked256,
+		ssa.OpAMD64VPSRLWMasked512,
+		ssa.OpAMD64VPSRLDMasked128,
+		ssa.OpAMD64VPSRLDMasked256,
+		ssa.OpAMD64VPSRLDMasked512,
+		ssa.OpAMD64VPSRLQMasked128,
+		ssa.OpAMD64VPSRLQMasked256,
+		ssa.OpAMD64VPSRLQMasked512,
 		ssa.OpAMD64VPSHLDVWMasked128,
 		ssa.OpAMD64VPSHLDVWMasked256,
 		ssa.OpAMD64VPSHLDVWMasked512,
@ -1243,15 +1285,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VPSHRDVQMasked128,
 		ssa.OpAMD64VPSHRDVQMasked256,
 		ssa.OpAMD64VPSHRDVQMasked512,
-		ssa.OpAMD64VPSRLVWMasked128,
-		ssa.OpAMD64VPSRLVWMasked256,
-		ssa.OpAMD64VPSRLVWMasked512,
-		ssa.OpAMD64VPSRLVDMasked128,
-		ssa.OpAMD64VPSRLVDMasked256,
-		ssa.OpAMD64VPSRLVDMasked512,
-		ssa.OpAMD64VPSRLVQMasked128,
-		ssa.OpAMD64VPSRLVQMasked256,
-		ssa.OpAMD64VPSRLVQMasked512,
 		ssa.OpAMD64VPSRAVWMasked128,
 		ssa.OpAMD64VPSRAVWMasked256,
 		ssa.OpAMD64VPSRAVWMasked512,
@ -1261,6 +1294,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VPSRAVQMasked128,
 		ssa.OpAMD64VPSRAVQMasked256,
 		ssa.OpAMD64VPSRAVQMasked512,
+		ssa.OpAMD64VPSRLVWMasked128,
+		ssa.OpAMD64VPSRLVWMasked256,
+		ssa.OpAMD64VPSRLVWMasked512,
+		ssa.OpAMD64VPSRLVDMasked128,
+		ssa.OpAMD64VPSRLVDMasked256,
+		ssa.OpAMD64VPSRLVDMasked512,
+		ssa.OpAMD64VPSRLVQMasked128,
+		ssa.OpAMD64VPSRLVQMasked256,
+		ssa.OpAMD64VPSRLVQMasked512,
 		ssa.OpAMD64VSQRTPSMasked128,
 		ssa.OpAMD64VSQRTPSMasked256,
 		ssa.OpAMD64VSQRTPSMasked512,
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
@ -1239,15 +1239,19 @@
 (SetElemUint64x2 ...) => (VPINSRQ128 ...)
 (ShiftAllLeftInt16x8 ...) => (VPSLLW128 ...)
 (ShiftAllLeftInt16x16 ...) => (VPSLLW256 ...)
+(ShiftAllLeftInt16x32 ...) => (VPSLLW512 ...)
 (ShiftAllLeftInt32x4 ...) => (VPSLLD128 ...)
 (ShiftAllLeftInt32x8 ...) => (VPSLLD256 ...)
+(ShiftAllLeftInt32x16 ...) => (VPSLLD512 ...)
 (ShiftAllLeftInt64x2 ...) => (VPSLLQ128 ...)
 (ShiftAllLeftInt64x4 ...) => (VPSLLQ256 ...)
 (ShiftAllLeftInt64x8 ...) => (VPSLLQ512 ...)
 (ShiftAllLeftUint16x8 ...) => (VPSLLW128 ...)
 (ShiftAllLeftUint16x16 ...) => (VPSLLW256 ...)
+(ShiftAllLeftUint16x32 ...) => (VPSLLW512 ...)
 (ShiftAllLeftUint32x4 ...) => (VPSLLD128 ...)
 (ShiftAllLeftUint32x8 ...) => (VPSLLD256 ...)
+(ShiftAllLeftUint32x16 ...) => (VPSLLD512 ...)
 (ShiftAllLeftUint64x2 ...) => (VPSLLQ128 ...)
 (ShiftAllLeftUint64x4 ...) => (VPSLLQ256 ...)
 (ShiftAllLeftUint64x8 ...) => (VPSLLQ512 ...)
@ -1287,23 +1291,39 @@
 (ShiftAllLeftAndFillUpperFromMaskedUint64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ShiftAllLeftAndFillUpperFromMaskedUint64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ShiftAllLeftAndFillUpperFromMaskedUint64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedInt16x8 x y mask) => (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedInt16x16 x y mask) => (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedInt16x32 x y mask) => (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedInt32x4 x y mask) => (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedInt32x8 x y mask) => (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedInt32x16 x y mask) => (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedInt64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedInt64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedInt64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedUint16x8 x y mask) => (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedUint16x16 x y mask) => (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedUint16x32 x y mask) => (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedUint32x4 x y mask) => (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedUint32x8 x y mask) => (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedUint32x16 x y mask) => (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedUint64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedUint64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedUint64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllRightInt16x8 ...) => (VPSRLW128 ...)
-(ShiftAllRightInt16x16 ...) => (VPSRLW256 ...)
-(ShiftAllRightInt32x4 ...) => (VPSRLD128 ...)
-(ShiftAllRightInt32x8 ...) => (VPSRLD256 ...)
-(ShiftAllRightInt64x2 ...) => (VPSRLQ128 ...)
-(ShiftAllRightInt64x4 ...) => (VPSRLQ256 ...)
-(ShiftAllRightInt64x8 ...) => (VPSRLQ512 ...)
+(ShiftAllRightInt16x8 ...) => (VPSRAW128 ...)
+(ShiftAllRightInt16x16 ...) => (VPSRAW256 ...)
+(ShiftAllRightInt16x32 ...) => (VPSRAW512 ...)
+(ShiftAllRightInt32x4 ...) => (VPSRAD128 ...)
+(ShiftAllRightInt32x8 ...) => (VPSRAD256 ...)
+(ShiftAllRightInt32x16 ...) => (VPSRAD512 ...)
+(ShiftAllRightInt64x2 ...) => (VPSRAQ128 ...)
+(ShiftAllRightInt64x4 ...) => (VPSRAQ256 ...)
+(ShiftAllRightInt64x8 ...) => (VPSRAQ512 ...)
 (ShiftAllRightUint16x8 ...) => (VPSRLW128 ...)
 (ShiftAllRightUint16x16 ...) => (VPSRLW256 ...)
+(ShiftAllRightUint16x32 ...) => (VPSRLW512 ...)
 (ShiftAllRightUint32x4 ...) => (VPSRLD128 ...)
 (ShiftAllRightUint32x8 ...) => (VPSRLD256 ...)
+(ShiftAllRightUint32x16 ...) => (VPSRLD512 ...)
 (ShiftAllRightUint64x2 ...) => (VPSRLQ128 ...)
 (ShiftAllRightUint64x4 ...) => (VPSRLQ256 ...)
 (ShiftAllRightUint64x8 ...) => (VPSRLQ512 ...)
@ -1343,22 +1363,24 @@
 (ShiftAllRightAndFillUpperFromMaskedUint64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ShiftAllRightAndFillUpperFromMaskedUint64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ShiftAllRightAndFillUpperFromMaskedUint64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedInt64x2 x y mask) => (VPSRLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedInt64x4 x y mask) => (VPSRLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedInt64x8 x y mask) => (VPSRLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt16x8 x y mask) => (VPSRAWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt16x16 x y mask) => (VPSRAWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt16x32 x y mask) => (VPSRAWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt32x4 x y mask) => (VPSRADMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt32x8 x y mask) => (VPSRADMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt32x16 x y mask) => (VPSRADMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt64x2 x y mask) => (VPSRAQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt64x4 x y mask) => (VPSRAQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt64x8 x y mask) => (VPSRAQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedUint16x8 x y mask) => (VPSRLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedUint16x16 x y mask) => (VPSRLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedUint16x32 x y mask) => (VPSRLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedUint32x4 x y mask) => (VPSRLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedUint32x8 x y mask) => (VPSRLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedUint32x16 x y mask) => (VPSRLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedUint64x2 x y mask) => (VPSRLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedUint64x4 x y mask) => (VPSRLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedUint64x8 x y mask) => (VPSRLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllRightSignExtendedInt16x8 ...) => (VPSRAW128 ...)
-(ShiftAllRightSignExtendedInt16x16 ...) => (VPSRAW256 ...)
-(ShiftAllRightSignExtendedInt32x4 ...) => (VPSRAD128 ...)
-(ShiftAllRightSignExtendedInt32x8 ...) => (VPSRAD256 ...)
-(ShiftAllRightSignExtendedInt64x2 ...) => (VPSRAQ128 ...)
-(ShiftAllRightSignExtendedInt64x4 ...) => (VPSRAQ256 ...)
-(ShiftAllRightSignExtendedInt64x8 ...) => (VPSRAQ512 ...)
-(ShiftAllRightSignExtendedMaskedInt64x2 x y mask) => (VPSRAQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllRightSignExtendedMaskedInt64x4 x y mask) => (VPSRAQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllRightSignExtendedMaskedInt64x8 x y mask) => (VPSRAQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ShiftLeftInt16x8 ...) => (VPSLLVW128 ...)
 (ShiftLeftInt16x16 ...) => (VPSLLVW256 ...)
 (ShiftLeftInt16x32 ...) => (VPSLLVW512 ...)
@ -1431,15 +1453,15 @@
 (ShiftLeftMaskedUint64x2 x y mask) => (VPSLLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ShiftLeftMaskedUint64x4 x y mask) => (VPSLLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ShiftLeftMaskedUint64x8 x y mask) => (VPSLLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftRightInt16x8 ...) => (VPSRLVW128 ...)
-(ShiftRightInt16x16 ...) => (VPSRLVW256 ...)
-(ShiftRightInt16x32 ...) => (VPSRLVW512 ...)
-(ShiftRightInt32x4 ...) => (VPSRLVD128 ...)
-(ShiftRightInt32x8 ...) => (VPSRLVD256 ...)
-(ShiftRightInt32x16 ...) => (VPSRLVD512 ...)
-(ShiftRightInt64x2 ...) => (VPSRLVQ128 ...)
-(ShiftRightInt64x4 ...) => (VPSRLVQ256 ...)
-(ShiftRightInt64x8 ...) => (VPSRLVQ512 ...)
+(ShiftRightInt16x8 ...) => (VPSRAVW128 ...)
+(ShiftRightInt16x16 ...) => (VPSRAVW256 ...)
+(ShiftRightInt16x32 ...) => (VPSRAVW512 ...)
+(ShiftRightInt32x4 ...) => (VPSRAVD128 ...)
+(ShiftRightInt32x8 ...) => (VPSRAVD256 ...)
+(ShiftRightInt32x16 ...) => (VPSRAVD512 ...)
+(ShiftRightInt64x2 ...) => (VPSRAVQ128 ...)
+(ShiftRightInt64x4 ...) => (VPSRAVQ256 ...)
+(ShiftRightInt64x8 ...) => (VPSRAVQ512 ...)
 (ShiftRightUint16x8 ...) => (VPSRLVW128 ...)
 (ShiftRightUint16x16 ...) => (VPSRLVW256 ...)
 (ShiftRightUint16x32 ...) => (VPSRLVW512 ...)
@ -1485,15 +1507,15 @@
 (ShiftRightAndFillUpperFromMaskedUint64x2 x y z mask) => (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ShiftRightAndFillUpperFromMaskedUint64x4 x y z mask) => (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ShiftRightAndFillUpperFromMaskedUint64x8 x y z mask) => (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt16x8 x y mask) => (VPSRLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt16x16 x y mask) => (VPSRLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt16x32 x y mask) => (VPSRLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt32x4 x y mask) => (VPSRLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt32x8 x y mask) => (VPSRLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt32x16 x y mask) => (VPSRLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt64x2 x y mask) => (VPSRLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt64x4 x y mask) => (VPSRLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt64x8 x y mask) => (VPSRLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ShiftRightMaskedInt16x8 x y mask) => (VPSRAVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(ShiftRightMaskedInt16x16 x y mask) => (VPSRAVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(ShiftRightMaskedInt16x32 x y mask) => (VPSRAVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(ShiftRightMaskedInt32x4 x y mask) => (VPSRAVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ShiftRightMaskedInt32x8 x y mask) => (VPSRAVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ShiftRightMaskedInt32x16 x y mask) => (VPSRAVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(ShiftRightMaskedInt64x2 x y mask) => (VPSRAVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(ShiftRightMaskedInt64x4 x y mask) => (VPSRAVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(ShiftRightMaskedInt64x8 x y mask) => (VPSRAVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ShiftRightMaskedUint16x8 x y mask) => (VPSRLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
 (ShiftRightMaskedUint16x16 x y mask) => (VPSRLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
 (ShiftRightMaskedUint16x32 x y mask) => (VPSRLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
@ -1503,42 +1525,6 @@
 (ShiftRightMaskedUint64x2 x y mask) => (VPSRLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ShiftRightMaskedUint64x4 x y mask) => (VPSRLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ShiftRightMaskedUint64x8 x y mask) => (VPSRLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedInt16x8 ...) => (VPSRAVW128 ...)
-(ShiftRightSignExtendedInt16x16 ...) => (VPSRAVW256 ...)
-(ShiftRightSignExtendedInt16x32 ...) => (VPSRAVW512 ...)
-(ShiftRightSignExtendedInt32x4 ...) => (VPSRAVD128 ...)
-(ShiftRightSignExtendedInt32x8 ...) => (VPSRAVD256 ...)
-(ShiftRightSignExtendedInt32x16 ...) => (VPSRAVD512 ...)
-(ShiftRightSignExtendedInt64x2 ...) => (VPSRAVQ128 ...)
-(ShiftRightSignExtendedInt64x4 ...) => (VPSRAVQ256 ...)
-(ShiftRightSignExtendedInt64x8 ...) => (VPSRAVQ512 ...)
-(ShiftRightSignExtendedUint16x8 ...) => (VPSRAVW128 ...)
-(ShiftRightSignExtendedUint16x16 ...) => (VPSRAVW256 ...)
-(ShiftRightSignExtendedUint16x32 ...) => (VPSRAVW512 ...)
-(ShiftRightSignExtendedUint32x4 ...) => (VPSRAVD128 ...)
-(ShiftRightSignExtendedUint32x8 ...) => (VPSRAVD256 ...)
-(ShiftRightSignExtendedUint32x16 ...) => (VPSRAVD512 ...)
-(ShiftRightSignExtendedUint64x2 ...) => (VPSRAVQ128 ...)
-(ShiftRightSignExtendedUint64x4 ...) => (VPSRAVQ256 ...)
-(ShiftRightSignExtendedUint64x8 ...) => (VPSRAVQ512 ...)
-(ShiftRightSignExtendedMaskedInt16x8 x y mask) => (VPSRAVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedInt16x16 x y mask) => (VPSRAVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedInt16x32 x y mask) => (VPSRAVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedInt32x4 x y mask) => (VPSRAVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedInt32x8 x y mask) => (VPSRAVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedInt32x16 x y mask) => (VPSRAVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedInt64x2 x y mask) => (VPSRAVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedInt64x4 x y mask) => (VPSRAVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedInt64x8 x y mask) => (VPSRAVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedUint16x8 x y mask) => (VPSRAVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedUint16x16 x y mask) => (VPSRAVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedUint16x32 x y mask) => (VPSRAVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedUint32x4 x y mask) => (VPSRAVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedUint32x8 x y mask) => (VPSRAVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedUint32x16 x y mask) => (VPSRAVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedUint64x2 x y mask) => (VPSRAVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedUint64x4 x y mask) => (VPSRAVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedUint64x8 x y mask) => (VPSRAVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (SignInt8x16 ...) => (VPSIGNB128 ...)
 (SignInt8x32 ...) => (VPSIGNB256 ...)
 (SignInt16x8 ...) => (VPSIGNW128 ...)
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
@ -198,17 +198,16 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPSUBSW256", argLength: 2, reg: v21, asm: "VPSUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSUBSWMasked256", argLength: 3, reg: w2kw, asm: "VPSUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSLLW256", argLength: 2, reg: vfpv, asm: "VPSLLW", commutative: false, typ: "Vec256", resultInArg0: false},
-		{name: "VPSRLW256", argLength: 2, reg: vfpv, asm: "VPSRLW", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSLLWMasked256", argLength: 3, reg: wfpkw, asm: "VPSLLW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSRAW256", argLength: 2, reg: vfpv, asm: "VPSRAW", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRAWMasked256", argLength: 3, reg: wfpkw, asm: "VPSRAW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSLLVW256", argLength: 2, reg: w21, asm: "VPSLLVW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSHLDVW256", argLength: 3, reg: w31, asm: "VPSHLDVW", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPSHLDVWMasked256", argLength: 4, reg: w3kw, asm: "VPSHLDVW", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPSLLVWMasked256", argLength: 3, reg: w2kw, asm: "VPSLLVW", commutative: false, typ: "Vec256", resultInArg0: false},
-		{name: "VPSRLVW256", argLength: 2, reg: w21, asm: "VPSRLVW", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRAVW256", argLength: 2, reg: w21, asm: "VPSRAVW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSHRDVW256", argLength: 3, reg: w31, asm: "VPSHRDVW", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPSHRDVWMasked256", argLength: 4, reg: w3kw, asm: "VPSHRDVW", commutative: false, typ: "Vec256", resultInArg0: true},
-		{name: "VPSRLVWMasked256", argLength: 3, reg: w2kw, asm: "VPSRLVW", commutative: false, typ: "Vec256", resultInArg0: false},
-		{name: "VPSRAVW256", argLength: 2, reg: w21, asm: "VPSRAVW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSRAVWMasked256", argLength: 3, reg: w2kw, asm: "VPSRAVW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSIGNW256", argLength: 2, reg: v21, asm: "VPSIGNW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSUBW256", argLength: 2, reg: v21, asm: "VPSUBW", commutative: false, typ: "Vec256", resultInArg0: false},
@ -233,15 +232,17 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPADDSWMasked512", argLength: 3, reg: w2kw, asm: "VPADDSW", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPSUBSW512", argLength: 2, reg: w21, asm: "VPSUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSUBSWMasked512", argLength: 3, reg: w2kw, asm: "VPSUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSLLW512", argLength: 2, reg: wfpw, asm: "VPSLLW", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSLLWMasked512", argLength: 3, reg: wfpkw, asm: "VPSLLW", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRAW512", argLength: 2, reg: wfpw, asm: "VPSRAW", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRAWMasked512", argLength: 3, reg: wfpkw, asm: "VPSRAW", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSLLVW512", argLength: 2, reg: w21, asm: "VPSLLVW", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSHLDVW512", argLength: 3, reg: w31, asm: "VPSHLDVW", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPSHLDVWMasked512", argLength: 4, reg: w3kw, asm: "VPSHLDVW", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPSLLVWMasked512", argLength: 3, reg: w2kw, asm: "VPSLLVW", commutative: false, typ: "Vec512", resultInArg0: false},
-		{name: "VPSRLVW512", argLength: 2, reg: w21, asm: "VPSRLVW", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRAVW512", argLength: 2, reg: w21, asm: "VPSRAVW", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSHRDVW512", argLength: 3, reg: w31, asm: "VPSHRDVW", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPSHRDVWMasked512", argLength: 4, reg: w3kw, asm: "VPSHRDVW", commutative: false, typ: "Vec512", resultInArg0: true},
-		{name: "VPSRLVWMasked512", argLength: 3, reg: w2kw, asm: "VPSRLVW", commutative: false, typ: "Vec512", resultInArg0: false},
-		{name: "VPSRAVW512", argLength: 2, reg: w21, asm: "VPSRAVW", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSRAVWMasked512", argLength: 3, reg: w2kw, asm: "VPSRAVW", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSUBW512", argLength: 2, reg: w21, asm: "VPSUBW", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSUBWMasked512", argLength: 3, reg: w2kw, asm: "VPSUBW", commutative: false, typ: "Vec512", resultInArg0: false},
@ -272,17 +273,16 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPSUBSW128", argLength: 2, reg: v21, asm: "VPSUBSW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSUBSWMasked128", argLength: 3, reg: w2kw, asm: "VPSUBSW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSLLW128", argLength: 2, reg: vfpv, asm: "VPSLLW", commutative: false, typ: "Vec128", resultInArg0: false},
-		{name: "VPSRLW128", argLength: 2, reg: vfpv, asm: "VPSRLW", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSLLWMasked128", argLength: 3, reg: wfpkw, asm: "VPSLLW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSRAW128", argLength: 2, reg: vfpv, asm: "VPSRAW", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRAWMasked128", argLength: 3, reg: wfpkw, asm: "VPSRAW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSLLVW128", argLength: 2, reg: w21, asm: "VPSLLVW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSHLDVW128", argLength: 3, reg: w31, asm: "VPSHLDVW", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPSHLDVWMasked128", argLength: 4, reg: w3kw, asm: "VPSHLDVW", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPSLLVWMasked128", argLength: 3, reg: w2kw, asm: "VPSLLVW", commutative: false, typ: "Vec128", resultInArg0: false},
-		{name: "VPSRLVW128", argLength: 2, reg: w21, asm: "VPSRLVW", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRAVW128", argLength: 2, reg: w21, asm: "VPSRAVW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSHRDVW128", argLength: 3, reg: w31, asm: "VPSHRDVW", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPSHRDVWMasked128", argLength: 4, reg: w3kw, asm: "VPSHRDVW", commutative: false, typ: "Vec128", resultInArg0: true},
-		{name: "VPSRLVWMasked128", argLength: 3, reg: w2kw, asm: "VPSRLVW", commutative: false, typ: "Vec128", resultInArg0: false},
-		{name: "VPSRAVW128", argLength: 2, reg: w21, asm: "VPSRAVW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSRAVWMasked128", argLength: 3, reg: w2kw, asm: "VPSRAVW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSIGNW128", argLength: 2, reg: v21, asm: "VPSIGNW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSUBW128", argLength: 2, reg: v21, asm: "VPSUBW", commutative: false, typ: "Vec128", resultInArg0: false},
@ -315,15 +315,17 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPDPWSSDSMasked512", argLength: 4, reg: w3kw, asm: "VPDPWSSDS", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPDPBUSDS512", argLength: 3, reg: w31, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPDPBUSDSMasked512", argLength: 4, reg: w3kw, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true},
+		{name: "VPSLLD512", argLength: 2, reg: wfpw, asm: "VPSLLD", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSLLDMasked512", argLength: 3, reg: wfpkw, asm: "VPSLLD", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRAD512", argLength: 2, reg: wfpw, asm: "VPSRAD", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRADMasked512", argLength: 3, reg: wfpkw, asm: "VPSRAD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSLLVD512", argLength: 2, reg: w21, asm: "VPSLLVD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSHLDVD512", argLength: 3, reg: w31, asm: "VPSHLDVD", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPSHLDVDMasked512", argLength: 4, reg: w3kw, asm: "VPSHLDVD", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPSLLVDMasked512", argLength: 3, reg: w2kw, asm: "VPSLLVD", commutative: false, typ: "Vec512", resultInArg0: false},
-		{name: "VPSRLVD512", argLength: 2, reg: w21, asm: "VPSRLVD", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRAVD512", argLength: 2, reg: w21, asm: "VPSRAVD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSHRDVD512", argLength: 3, reg: w31, asm: "VPSHRDVD", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPSHRDVDMasked512", argLength: 4, reg: w3kw, asm: "VPSHRDVD", commutative: false, typ: "Vec512", resultInArg0: true},
-		{name: "VPSRLVDMasked512", argLength: 3, reg: w2kw, asm: "VPSRLVD", commutative: false, typ: "Vec512", resultInArg0: false},
-		{name: "VPSRAVD512", argLength: 2, reg: w21, asm: "VPSRAVD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSRAVDMasked512", argLength: 3, reg: w2kw, asm: "VPSRAVD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSUBD512", argLength: 2, reg: w21, asm: "VPSUBD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSUBDMasked512", argLength: 3, reg: w2kw, asm: "VPSUBD", commutative: false, typ: "Vec512", resultInArg0: false},
@ -362,17 +364,16 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPDPBUSDS128", argLength: 3, reg: v31, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPDPBUSDSMasked128", argLength: 4, reg: w3kw, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPSLLD128", argLength: 2, reg: vfpv, asm: "VPSLLD", commutative: false, typ: "Vec128", resultInArg0: false},
-		{name: "VPSRLD128", argLength: 2, reg: vfpv, asm: "VPSRLD", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSLLDMasked128", argLength: 3, reg: wfpkw, asm: "VPSLLD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSRAD128", argLength: 2, reg: vfpv, asm: "VPSRAD", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRADMasked128", argLength: 3, reg: wfpkw, asm: "VPSRAD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSLLVD128", argLength: 2, reg: v21, asm: "VPSLLVD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSHLDVD128", argLength: 3, reg: w31, asm: "VPSHLDVD", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPSHLDVDMasked128", argLength: 4, reg: w3kw, asm: "VPSHLDVD", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPSLLVDMasked128", argLength: 3, reg: w2kw, asm: "VPSLLVD", commutative: false, typ: "Vec128", resultInArg0: false},
-		{name: "VPSRLVD128", argLength: 2, reg: v21, asm: "VPSRLVD", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRAVD128", argLength: 2, reg: v21, asm: "VPSRAVD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSHRDVD128", argLength: 3, reg: w31, asm: "VPSHRDVD", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPSHRDVDMasked128", argLength: 4, reg: w3kw, asm: "VPSHRDVD", commutative: false, typ: "Vec128", resultInArg0: true},
-		{name: "VPSRLVDMasked128", argLength: 3, reg: w2kw, asm: "VPSRLVD", commutative: false, typ: "Vec128", resultInArg0: false},
-		{name: "VPSRAVD128", argLength: 2, reg: v21, asm: "VPSRAVD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSRAVDMasked128", argLength: 3, reg: w2kw, asm: "VPSRAVD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSIGND128", argLength: 2, reg: v21, asm: "VPSIGND", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSUBD128", argLength: 2, reg: v21, asm: "VPSUBD", commutative: false, typ: "Vec128", resultInArg0: false},
@ -411,17 +412,16 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPDPBUSDS256", argLength: 3, reg: v31, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPDPBUSDSMasked256", argLength: 4, reg: w3kw, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPSLLD256", argLength: 2, reg: vfpv, asm: "VPSLLD", commutative: false, typ: "Vec256", resultInArg0: false},
-		{name: "VPSRLD256", argLength: 2, reg: vfpv, asm: "VPSRLD", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSLLDMasked256", argLength: 3, reg: wfpkw, asm: "VPSLLD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSRAD256", argLength: 2, reg: vfpv, asm: "VPSRAD", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRADMasked256", argLength: 3, reg: wfpkw, asm: "VPSRAD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSLLVD256", argLength: 2, reg: v21, asm: "VPSLLVD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSHLDVD256", argLength: 3, reg: w31, asm: "VPSHLDVD", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPSHLDVDMasked256", argLength: 4, reg: w3kw, asm: "VPSHLDVD", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPSLLVDMasked256", argLength: 3, reg: w2kw, asm: "VPSLLVD", commutative: false, typ: "Vec256", resultInArg0: false},
-		{name: "VPSRLVD256", argLength: 2, reg: v21, asm: "VPSRLVD", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRAVD256", argLength: 2, reg: v21, asm: "VPSRAVD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSHRDVD256", argLength: 3, reg: w31, asm: "VPSHRDVD", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPSHRDVDMasked256", argLength: 4, reg: w3kw, asm: "VPSHRDVD", commutative: false, typ: "Vec256", resultInArg0: true},
-		{name: "VPSRLVDMasked256", argLength: 3, reg: w2kw, asm: "VPSRLVD", commutative: false, typ: "Vec256", resultInArg0: false},
-		{name: "VPSRAVD256", argLength: 2, reg: v21, asm: "VPSRAVD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSRAVDMasked256", argLength: 3, reg: w2kw, asm: "VPSRAVD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSIGND256", argLength: 2, reg: v21, asm: "VPSIGND", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSUBD256", argLength: 2, reg: v21, asm: "VPSUBD", commutative: false, typ: "Vec256", resultInArg0: false},
@ -453,19 +453,15 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPRORVQMasked128", argLength: 3, reg: w2kw, asm: "VPRORVQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSLLQ128", argLength: 2, reg: vfpv, asm: "VPSLLQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSLLQMasked128", argLength: 3, reg: wfpkw, asm: "VPSLLQ", commutative: false, typ: "Vec128", resultInArg0: false},
-		{name: "VPSRLQ128", argLength: 2, reg: vfpv, asm: "VPSRLQ", commutative: false, typ: "Vec128", resultInArg0: false},
-		{name: "VPSRLQMasked128", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSRAQ128", argLength: 2, reg: wfpw, asm: "VPSRAQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSRAQMasked128", argLength: 3, reg: wfpkw, asm: "VPSRAQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSLLVQ128", argLength: 2, reg: v21, asm: "VPSLLVQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSHLDVQ128", argLength: 3, reg: w31, asm: "VPSHLDVQ", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPSHLDVQMasked128", argLength: 4, reg: w3kw, asm: "VPSHLDVQ", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPSLLVQMasked128", argLength: 3, reg: w2kw, asm: "VPSLLVQ", commutative: false, typ: "Vec128", resultInArg0: false},
-		{name: "VPSRLVQ128", argLength: 2, reg: v21, asm: "VPSRLVQ", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRAVQ128", argLength: 2, reg: w21, asm: "VPSRAVQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSHRDVQ128", argLength: 3, reg: w31, asm: "VPSHRDVQ", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPSHRDVQMasked128", argLength: 4, reg: w3kw, asm: "VPSHRDVQ", commutative: false, typ: "Vec128", resultInArg0: true},
-		{name: "VPSRLVQMasked128", argLength: 3, reg: w2kw, asm: "VPSRLVQ", commutative: false, typ: "Vec128", resultInArg0: false},
-		{name: "VPSRAVQ128", argLength: 2, reg: w21, asm: "VPSRAVQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSRAVQMasked128", argLength: 3, reg: w2kw, asm: "VPSRAVQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSUBQ128", argLength: 2, reg: v21, asm: "VPSUBQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSUBQMasked128", argLength: 3, reg: w2kw, asm: "VPSUBQ", commutative: false, typ: "Vec128", resultInArg0: false},
@ -494,19 +490,15 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPRORVQMasked256", argLength: 3, reg: w2kw, asm: "VPRORVQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSLLQ256", argLength: 2, reg: vfpv, asm: "VPSLLQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSLLQMasked256", argLength: 3, reg: wfpkw, asm: "VPSLLQ", commutative: false, typ: "Vec256", resultInArg0: false},
-		{name: "VPSRLQ256", argLength: 2, reg: vfpv, asm: "VPSRLQ", commutative: false, typ: "Vec256", resultInArg0: false},
-		{name: "VPSRLQMasked256", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSRAQ256", argLength: 2, reg: wfpw, asm: "VPSRAQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSRAQMasked256", argLength: 3, reg: wfpkw, asm: "VPSRAQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSLLVQ256", argLength: 2, reg: v21, asm: "VPSLLVQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSHLDVQ256", argLength: 3, reg: w31, asm: "VPSHLDVQ", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPSHLDVQMasked256", argLength: 4, reg: w3kw, asm: "VPSHLDVQ", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPSLLVQMasked256", argLength: 3, reg: w2kw, asm: "VPSLLVQ", commutative: false, typ: "Vec256", resultInArg0: false},
-		{name: "VPSRLVQ256", argLength: 2, reg: v21, asm: "VPSRLVQ", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRAVQ256", argLength: 2, reg: w21, asm: "VPSRAVQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSHRDVQ256", argLength: 3, reg: w31, asm: "VPSHRDVQ", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPSHRDVQMasked256", argLength: 4, reg: w3kw, asm: "VPSHRDVQ", commutative: false, typ: "Vec256", resultInArg0: true},
-		{name: "VPSRLVQMasked256", argLength: 3, reg: w2kw, asm: "VPSRLVQ", commutative: false, typ: "Vec256", resultInArg0: false},
-		{name: "VPSRAVQ256", argLength: 2, reg: w21, asm: "VPSRAVQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSRAVQMasked256", argLength: 3, reg: w2kw, asm: "VPSRAVQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSUBQ256", argLength: 2, reg: v21, asm: "VPSUBQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSUBQMasked256", argLength: 3, reg: w2kw, asm: "VPSUBQ", commutative: false, typ: "Vec256", resultInArg0: false},
@ -537,19 +529,15 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPRORVQMasked512", argLength: 3, reg: w2kw, asm: "VPRORVQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSLLQ512", argLength: 2, reg: wfpw, asm: "VPSLLQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSLLQMasked512", argLength: 3, reg: wfpkw, asm: "VPSLLQ", commutative: false, typ: "Vec512", resultInArg0: false},
-		{name: "VPSRLQ512", argLength: 2, reg: wfpw, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false},
-		{name: "VPSRLQMasked512", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSRAQ512", argLength: 2, reg: wfpw, asm: "VPSRAQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSRAQMasked512", argLength: 3, reg: wfpkw, asm: "VPSRAQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSLLVQ512", argLength: 2, reg: w21, asm: "VPSLLVQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSHLDVQ512", argLength: 3, reg: w31, asm: "VPSHLDVQ", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPSHLDVQMasked512", argLength: 4, reg: w3kw, asm: "VPSHLDVQ", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPSLLVQMasked512", argLength: 3, reg: w2kw, asm: "VPSLLVQ", commutative: false, typ: "Vec512", resultInArg0: false},
-		{name: "VPSRLVQ512", argLength: 2, reg: w21, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRAVQ512", argLength: 2, reg: w21, asm: "VPSRAVQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSHRDVQ512", argLength: 3, reg: w31, asm: "VPSHRDVQ", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPSHRDVQMasked512", argLength: 4, reg: w3kw, asm: "VPSHRDVQ", commutative: false, typ: "Vec512", resultInArg0: true},
-		{name: "VPSRLVQMasked512", argLength: 3, reg: w2kw, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: false},
-		{name: "VPSRAVQ512", argLength: 2, reg: w21, asm: "VPSRAVQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSRAVQMasked512", argLength: 3, reg: w2kw, asm: "VPSRAVQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSUBQ512", argLength: 2, reg: w21, asm: "VPSUBQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSUBQMasked512", argLength: 3, reg: w2kw, asm: "VPSUBQ", commutative: false, typ: "Vec512", resultInArg0: false},
@ -625,6 +613,10 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPMINUWMasked256", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPMULHUW256", argLength: 2, reg: v21, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPMULHUWMasked256", argLength: 3, reg: w2kw, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLW256", argLength: 2, reg: vfpv, asm: "VPSRLW", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLWMasked256", argLength: 3, reg: wfpkw, asm: "VPSRLW", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLVW256", argLength: 2, reg: w21, asm: "VPSRLVW", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLVWMasked256", argLength: 3, reg: w2kw, asm: "VPSRLVW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPAVGW512", argLength: 2, reg: w21, asm: "VPAVGW", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPAVGWMasked512", argLength: 3, reg: w2kw, asm: "VPAVGW", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMAXUW512", argLength: 2, reg: w21, asm: "VPMAXUW", commutative: true, typ: "Vec512", resultInArg0: false},
@ -633,6 +625,10 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPMINUWMasked512", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMULHUW512", argLength: 2, reg: w21, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMULHUWMasked512", argLength: 3, reg: w2kw, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRLW512", argLength: 2, reg: wfpw, asm: "VPSRLW", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRLWMasked512", argLength: 3, reg: wfpkw, asm: "VPSRLW", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRLVW512", argLength: 2, reg: w21, asm: "VPSRLVW", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRLVWMasked512", argLength: 3, reg: w2kw, asm: "VPSRLVW", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPAVGW128", argLength: 2, reg: v21, asm: "VPAVGW", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPAVGWMasked128", argLength: 3, reg: w2kw, asm: "VPAVGW", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPMAXUW128", argLength: 2, reg: v21, asm: "VPMAXUW", commutative: true, typ: "Vec128", resultInArg0: false},
@ -641,36 +637,64 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPMINUWMasked128", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPMULHUW128", argLength: 2, reg: v21, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPMULHUWMasked128", argLength: 3, reg: w2kw, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLW128", argLength: 2, reg: vfpv, asm: "VPSRLW", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLWMasked128", argLength: 3, reg: wfpkw, asm: "VPSRLW", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLVW128", argLength: 2, reg: w21, asm: "VPSRLVW", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLVWMasked128", argLength: 3, reg: w2kw, asm: "VPSRLVW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPMAXUD512", argLength: 2, reg: w21, asm: "VPMAXUD", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMAXUDMasked512", argLength: 3, reg: w2kw, asm: "VPMAXUD", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMINUD512", argLength: 2, reg: w21, asm: "VPMINUD", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMINUDMasked512", argLength: 3, reg: w2kw, asm: "VPMINUD", commutative: true, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRLD512", argLength: 2, reg: wfpw, asm: "VPSRLD", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRLDMasked512", argLength: 3, reg: wfpkw, asm: "VPSRLD", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRLVD512", argLength: 2, reg: w21, asm: "VPSRLVD", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRLVDMasked512", argLength: 3, reg: w2kw, asm: "VPSRLVD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPMAXUD128", argLength: 2, reg: v21, asm: "VPMAXUD", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPMAXUDMasked128", argLength: 3, reg: w2kw, asm: "VPMAXUD", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPMINUD128", argLength: 2, reg: v21, asm: "VPMINUD", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPMINUDMasked128", argLength: 3, reg: w2kw, asm: "VPMINUD", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPMULUDQ128", argLength: 2, reg: v21, asm: "VPMULUDQ", commutative: true, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLD128", argLength: 2, reg: vfpv, asm: "VPSRLD", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLDMasked128", argLength: 3, reg: wfpkw, asm: "VPSRLD", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLVD128", argLength: 2, reg: v21, asm: "VPSRLVD", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLVDMasked128", argLength: 3, reg: w2kw, asm: "VPSRLVD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPMAXUD256", argLength: 2, reg: v21, asm: "VPMAXUD", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPMAXUDMasked256", argLength: 3, reg: w2kw, asm: "VPMAXUD", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPMINUD256", argLength: 2, reg: v21, asm: "VPMINUD", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPMINUDMasked256", argLength: 3, reg: w2kw, asm: "VPMINUD", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPMULUDQ256", argLength: 2, reg: v21, asm: "VPMULUDQ", commutative: true, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLD256", argLength: 2, reg: vfpv, asm: "VPSRLD", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLDMasked256", argLength: 3, reg: wfpkw, asm: "VPSRLD", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLVD256", argLength: 2, reg: v21, asm: "VPSRLVD", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLVDMasked256", argLength: 3, reg: w2kw, asm: "VPSRLVD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPMAXUQ128", argLength: 2, reg: w21, asm: "VPMAXUQ", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPMAXUQMasked128", argLength: 3, reg: w2kw, asm: "VPMAXUQ", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPMINUQ128", argLength: 2, reg: w21, asm: "VPMINUQ", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPMINUQMasked128", argLength: 3, reg: w2kw, asm: "VPMINUQ", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPMULUDQMasked128", argLength: 3, reg: w2kw, asm: "VPMULUDQ", commutative: true, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLQ128", argLength: 2, reg: vfpv, asm: "VPSRLQ", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLQMasked128", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLVQ128", argLength: 2, reg: v21, asm: "VPSRLVQ", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLVQMasked128", argLength: 3, reg: w2kw, asm: "VPSRLVQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPMAXUQ256", argLength: 2, reg: w21, asm: "VPMAXUQ", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPMAXUQMasked256", argLength: 3, reg: w2kw, asm: "VPMAXUQ", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPMINUQ256", argLength: 2, reg: w21, asm: "VPMINUQ", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPMINUQMasked256", argLength: 3, reg: w2kw, asm: "VPMINUQ", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPMULUDQMasked256", argLength: 3, reg: w2kw, asm: "VPMULUDQ", commutative: true, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLQ256", argLength: 2, reg: vfpv, asm: "VPSRLQ", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLQMasked256", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLVQ256", argLength: 2, reg: v21, asm: "VPSRLVQ", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLVQMasked256", argLength: 3, reg: w2kw, asm: "VPSRLVQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPMAXUQ512", argLength: 2, reg: w21, asm: "VPMAXUQ", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMAXUQMasked512", argLength: 3, reg: w2kw, asm: "VPMAXUQ", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMINUQ512", argLength: 2, reg: w21, asm: "VPMINUQ", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMINUQMasked512", argLength: 3, reg: w2kw, asm: "VPMINUQ", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMULUDQ512", argLength: 2, reg: w21, asm: "VPMULUDQ", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMULUDQMasked512", argLength: 3, reg: w2kw, asm: "VPMULUDQ", commutative: true, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRLQ512", argLength: 2, reg: wfpw, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRLQMasked512", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRLVQ512", argLength: 2, reg: w21, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRLVQMasked512", argLength: 3, reg: w2kw, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPAVGB128", argLength: 2, reg: v21, asm: "VPAVGB", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPAVGBMasked128", argLength: 3, reg: w2kw, asm: "VPAVGB", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VGF2P8MULB128", argLength: 2, reg: w21, asm: "VGF2P8MULB", commutative: false, typ: "Vec128", resultInArg0: false},
--- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
@ -312,8 +312,9 @@ func simdGenericOps() []opData {
 		{name: "SaturatedSubInt16x16", argLength: 2, commutative: false},
 		{name: "SaturatedSubMaskedInt16x16", argLength: 3, commutative: false},
 		{name: "ShiftAllLeftInt16x16", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftMaskedInt16x16", argLength: 3, commutative: false},
 		{name: "ShiftAllRightInt16x16", argLength: 2, commutative: false},
-		{name: "ShiftAllRightSignExtendedInt16x16", argLength: 2, commutative: false},
+		{name: "ShiftAllRightMaskedInt16x16", argLength: 3, commutative: false},
 		{name: "ShiftLeftInt16x16", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromInt16x16", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedInt16x16", argLength: 4, commutative: false},
@ -322,8 +323,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromInt16x16", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedInt16x16", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedInt16x16", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedInt16x16", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedInt16x16", argLength: 3, commutative: false},
 		{name: "SignInt16x16", argLength: 2, commutative: false},
 		{name: "SubInt16x16", argLength: 2, commutative: false},
 		{name: "SubMaskedInt16x16", argLength: 3, commutative: false},
@ -360,6 +359,10 @@ func simdGenericOps() []opData {
 		{name: "SaturatedAddMaskedInt16x32", argLength: 3, commutative: true},
 		{name: "SaturatedSubInt16x32", argLength: 2, commutative: false},
 		{name: "SaturatedSubMaskedInt16x32", argLength: 3, commutative: false},
+		{name: "ShiftAllLeftInt16x32", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftMaskedInt16x32", argLength: 3, commutative: false},
+		{name: "ShiftAllRightInt16x32", argLength: 2, commutative: false},
+		{name: "ShiftAllRightMaskedInt16x32", argLength: 3, commutative: false},
 		{name: "ShiftLeftInt16x32", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromInt16x32", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedInt16x32", argLength: 4, commutative: false},
@ -368,8 +371,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromInt16x32", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedInt16x32", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedInt16x32", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedInt16x32", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedInt16x32", argLength: 3, commutative: false},
 		{name: "SubInt16x32", argLength: 2, commutative: false},
 		{name: "SubMaskedInt16x32", argLength: 3, commutative: false},
 		{name: "AbsoluteInt16x8", argLength: 1, commutative: false},
@ -412,8 +413,9 @@ func simdGenericOps() []opData {
 		{name: "SaturatedSubInt16x8", argLength: 2, commutative: false},
 		{name: "SaturatedSubMaskedInt16x8", argLength: 3, commutative: false},
 		{name: "ShiftAllLeftInt16x8", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftMaskedInt16x8", argLength: 3, commutative: false},
 		{name: "ShiftAllRightInt16x8", argLength: 2, commutative: false},
-		{name: "ShiftAllRightSignExtendedInt16x8", argLength: 2, commutative: false},
+		{name: "ShiftAllRightMaskedInt16x8", argLength: 3, commutative: false},
 		{name: "ShiftLeftInt16x8", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromInt16x8", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedInt16x8", argLength: 4, commutative: false},
@ -422,8 +424,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromInt16x8", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedInt16x8", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedInt16x8", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedInt16x8", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedInt16x8", argLength: 3, commutative: false},
 		{name: "SignInt16x8", argLength: 2, commutative: false},
 		{name: "SubInt16x8", argLength: 2, commutative: false},
 		{name: "SubMaskedInt16x8", argLength: 3, commutative: false},
@ -468,6 +468,10 @@ func simdGenericOps() []opData {
 		{name: "SaturatedPairDotProdAccumulateMaskedInt32x16", argLength: 4, commutative: false},
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 3, commutative: false},
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16", argLength: 4, commutative: false},
+		{name: "ShiftAllLeftInt32x16", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftMaskedInt32x16", argLength: 3, commutative: false},
+		{name: "ShiftAllRightInt32x16", argLength: 2, commutative: false},
+		{name: "ShiftAllRightMaskedInt32x16", argLength: 3, commutative: false},
 		{name: "ShiftLeftInt32x16", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromInt32x16", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedInt32x16", argLength: 4, commutative: false},
@ -476,8 +480,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromInt32x16", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedInt32x16", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedInt32x16", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedInt32x16", argLength: 3, commutative: false},
 		{name: "SubInt32x16", argLength: 2, commutative: false},
 		{name: "SubMaskedInt32x16", argLength: 3, commutative: false},
 		{name: "UnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 3, commutative: false},
@ -528,8 +530,9 @@ func simdGenericOps() []opData {
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 3, commutative: false},
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4", argLength: 4, commutative: false},
 		{name: "ShiftAllLeftInt32x4", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftMaskedInt32x4", argLength: 3, commutative: false},
 		{name: "ShiftAllRightInt32x4", argLength: 2, commutative: false},
-		{name: "ShiftAllRightSignExtendedInt32x4", argLength: 2, commutative: false},
+		{name: "ShiftAllRightMaskedInt32x4", argLength: 3, commutative: false},
 		{name: "ShiftLeftInt32x4", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromInt32x4", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedInt32x4", argLength: 4, commutative: false},
@ -538,8 +541,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromInt32x4", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedInt32x4", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedInt32x4", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedInt32x4", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedInt32x4", argLength: 3, commutative: false},
 		{name: "SignInt32x4", argLength: 2, commutative: false},
 		{name: "SubInt32x4", argLength: 2, commutative: false},
 		{name: "SubMaskedInt32x4", argLength: 3, commutative: false},
@ -591,8 +592,9 @@ func simdGenericOps() []opData {
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 3, commutative: false},
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8", argLength: 4, commutative: false},
 		{name: "ShiftAllLeftInt32x8", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftMaskedInt32x8", argLength: 3, commutative: false},
 		{name: "ShiftAllRightInt32x8", argLength: 2, commutative: false},
-		{name: "ShiftAllRightSignExtendedInt32x8", argLength: 2, commutative: false},
+		{name: "ShiftAllRightMaskedInt32x8", argLength: 3, commutative: false},
 		{name: "ShiftLeftInt32x8", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromInt32x8", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedInt32x8", argLength: 4, commutative: false},
@ -601,8 +603,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromInt32x8", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedInt32x8", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedInt32x8", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedInt32x8", argLength: 3, commutative: false},
 		{name: "SignInt32x8", argLength: 2, commutative: false},
 		{name: "SubInt32x8", argLength: 2, commutative: false},
 		{name: "SubMaskedInt32x8", argLength: 3, commutative: false},
@ -650,8 +650,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftAllLeftMaskedInt64x2", argLength: 3, commutative: false},
 		{name: "ShiftAllRightInt64x2", argLength: 2, commutative: false},
 		{name: "ShiftAllRightMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "ShiftAllRightSignExtendedInt64x2", argLength: 2, commutative: false},
-		{name: "ShiftAllRightSignExtendedMaskedInt64x2", argLength: 3, commutative: false},
 		{name: "ShiftLeftInt64x2", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromInt64x2", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedInt64x2", argLength: 4, commutative: false},
@ -660,8 +658,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromInt64x2", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedInt64x2", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedInt64x2", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedInt64x2", argLength: 3, commutative: false},
 		{name: "SubInt64x2", argLength: 2, commutative: false},
 		{name: "SubMaskedInt64x2", argLength: 3, commutative: false},
 		{name: "XorInt64x2", argLength: 2, commutative: true},
@ -706,8 +702,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftAllLeftMaskedInt64x4", argLength: 3, commutative: false},
 		{name: "ShiftAllRightInt64x4", argLength: 2, commutative: false},
 		{name: "ShiftAllRightMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "ShiftAllRightSignExtendedInt64x4", argLength: 2, commutative: false},
-		{name: "ShiftAllRightSignExtendedMaskedInt64x4", argLength: 3, commutative: false},
 		{name: "ShiftLeftInt64x4", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromInt64x4", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedInt64x4", argLength: 4, commutative: false},
@ -716,8 +710,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromInt64x4", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedInt64x4", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedInt64x4", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedInt64x4", argLength: 3, commutative: false},
 		{name: "SubInt64x4", argLength: 2, commutative: false},
 		{name: "SubMaskedInt64x4", argLength: 3, commutative: false},
 		{name: "XorInt64x4", argLength: 2, commutative: true},
@ -762,8 +754,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftAllLeftMaskedInt64x8", argLength: 3, commutative: false},
 		{name: "ShiftAllRightInt64x8", argLength: 2, commutative: false},
 		{name: "ShiftAllRightMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "ShiftAllRightSignExtendedInt64x8", argLength: 2, commutative: false},
-		{name: "ShiftAllRightSignExtendedMaskedInt64x8", argLength: 3, commutative: false},
 		{name: "ShiftLeftInt64x8", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromInt64x8", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedInt64x8", argLength: 4, commutative: false},
@ -772,8 +762,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromInt64x8", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedInt64x8", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedInt64x8", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedInt64x8", argLength: 3, commutative: false},
 		{name: "SubInt64x8", argLength: 2, commutative: false},
 		{name: "SubMaskedInt64x8", argLength: 3, commutative: false},
 		{name: "XorInt64x8", argLength: 2, commutative: true},
@ -906,7 +894,9 @@ func simdGenericOps() []opData {
 		{name: "SaturatedSubUint16x16", argLength: 2, commutative: false},
 		{name: "SaturatedSubMaskedUint16x16", argLength: 3, commutative: false},
 		{name: "ShiftAllLeftUint16x16", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftMaskedUint16x16", argLength: 3, commutative: false},
 		{name: "ShiftAllRightUint16x16", argLength: 2, commutative: false},
+		{name: "ShiftAllRightMaskedUint16x16", argLength: 3, commutative: false},
 		{name: "ShiftLeftUint16x16", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromUint16x16", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedUint16x16", argLength: 4, commutative: false},
@ -915,8 +905,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromUint16x16", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedUint16x16", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedUint16x16", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedUint16x16", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedUint16x16", argLength: 3, commutative: false},
 		{name: "SubUint16x16", argLength: 2, commutative: false},
 		{name: "SubMaskedUint16x16", argLength: 3, commutative: false},
 		{name: "XorUint16x16", argLength: 2, commutative: true},
@ -948,6 +936,10 @@ func simdGenericOps() []opData {
 		{name: "SaturatedAddMaskedUint16x32", argLength: 3, commutative: true},
 		{name: "SaturatedSubUint16x32", argLength: 2, commutative: false},
 		{name: "SaturatedSubMaskedUint16x32", argLength: 3, commutative: false},
+		{name: "ShiftAllLeftUint16x32", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftMaskedUint16x32", argLength: 3, commutative: false},
+		{name: "ShiftAllRightUint16x32", argLength: 2, commutative: false},
+		{name: "ShiftAllRightMaskedUint16x32", argLength: 3, commutative: false},
 		{name: "ShiftLeftUint16x32", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromUint16x32", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedUint16x32", argLength: 4, commutative: false},
@ -956,8 +948,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromUint16x32", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedUint16x32", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedUint16x32", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedUint16x32", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedUint16x32", argLength: 3, commutative: false},
 		{name: "SubUint16x32", argLength: 2, commutative: false},
 		{name: "SubMaskedUint16x32", argLength: 3, commutative: false},
 		{name: "AddUint16x8", argLength: 2, commutative: true},
@ -994,7 +984,9 @@ func simdGenericOps() []opData {
 		{name: "SaturatedSubUint16x8", argLength: 2, commutative: false},
 		{name: "SaturatedSubMaskedUint16x8", argLength: 3, commutative: false},
 		{name: "ShiftAllLeftUint16x8", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftMaskedUint16x8", argLength: 3, commutative: false},
 		{name: "ShiftAllRightUint16x8", argLength: 2, commutative: false},
+		{name: "ShiftAllRightMaskedUint16x8", argLength: 3, commutative: false},
 		{name: "ShiftLeftUint16x8", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromUint16x8", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedUint16x8", argLength: 4, commutative: false},
@ -1003,8 +995,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromUint16x8", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedUint16x8", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedUint16x8", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedUint16x8", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedUint16x8", argLength: 3, commutative: false},
 		{name: "SubUint16x8", argLength: 2, commutative: false},
 		{name: "SubMaskedUint16x8", argLength: 3, commutative: false},
 		{name: "XorUint16x8", argLength: 2, commutative: true},
@ -1040,6 +1030,10 @@ func simdGenericOps() []opData {
 		{name: "RotateRightMaskedUint32x16", argLength: 3, commutative: false},
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 3, commutative: false},
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16", argLength: 4, commutative: false},
+		{name: "ShiftAllLeftUint32x16", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftMaskedUint32x16", argLength: 3, commutative: false},
+		{name: "ShiftAllRightUint32x16", argLength: 2, commutative: false},
+		{name: "ShiftAllRightMaskedUint32x16", argLength: 3, commutative: false},
 		{name: "ShiftLeftUint32x16", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromUint32x16", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedUint32x16", argLength: 4, commutative: false},
@ -1048,8 +1042,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromUint32x16", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedUint32x16", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedUint32x16", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedUint32x16", argLength: 3, commutative: false},
 		{name: "SubUint32x16", argLength: 2, commutative: false},
 		{name: "SubMaskedUint32x16", argLength: 3, commutative: false},
 		{name: "UnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 3, commutative: false},
@ -1092,7 +1084,9 @@ func simdGenericOps() []opData {
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 3, commutative: false},
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4", argLength: 4, commutative: false},
 		{name: "ShiftAllLeftUint32x4", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftMaskedUint32x4", argLength: 3, commutative: false},
 		{name: "ShiftAllRightUint32x4", argLength: 2, commutative: false},
+		{name: "ShiftAllRightMaskedUint32x4", argLength: 3, commutative: false},
 		{name: "ShiftLeftUint32x4", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromUint32x4", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedUint32x4", argLength: 4, commutative: false},
@ -1101,8 +1095,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromUint32x4", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedUint32x4", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedUint32x4", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedUint32x4", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedUint32x4", argLength: 3, commutative: false},
 		{name: "SubUint32x4", argLength: 2, commutative: false},
 		{name: "SubMaskedUint32x4", argLength: 3, commutative: false},
 		{name: "UnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 3, commutative: false},
@ -1145,7 +1137,9 @@ func simdGenericOps() []opData {
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 3, commutative: false},
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8", argLength: 4, commutative: false},
 		{name: "ShiftAllLeftUint32x8", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftMaskedUint32x8", argLength: 3, commutative: false},
 		{name: "ShiftAllRightUint32x8", argLength: 2, commutative: false},
+		{name: "ShiftAllRightMaskedUint32x8", argLength: 3, commutative: false},
 		{name: "ShiftLeftUint32x8", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromUint32x8", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedUint32x8", argLength: 4, commutative: false},
@ -1154,8 +1148,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromUint32x8", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedUint32x8", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedUint32x8", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedUint32x8", argLength: 3, commutative: false},
 		{name: "SubUint32x8", argLength: 2, commutative: false},
 		{name: "SubMaskedUint32x8", argLength: 3, commutative: false},
 		{name: "UnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 3, commutative: false},
@ -1206,8 +1198,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromUint64x2", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedUint64x2", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedUint64x2", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedUint64x2", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedUint64x2", argLength: 3, commutative: false},
 		{name: "SubUint64x2", argLength: 2, commutative: false},
 		{name: "SubMaskedUint64x2", argLength: 3, commutative: false},
 		{name: "XorUint64x2", argLength: 2, commutative: true},
@ -1256,8 +1246,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromUint64x4", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedUint64x4", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedUint64x4", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedUint64x4", argLength: 3, commutative: false},
 		{name: "SubUint64x4", argLength: 2, commutative: false},
 		{name: "SubMaskedUint64x4", argLength: 3, commutative: false},
 		{name: "XorUint64x4", argLength: 2, commutative: true},
@ -1306,8 +1294,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromUint64x8", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedUint64x8", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedUint64x8", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedUint64x8", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "SubUint64x8", argLength: 2, commutative: false},
 		{name: "SubMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "XorUint64x8", argLength: 2, commutative: true},
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
--- a/src/cmd/compile/internal/ssagen/simdintrinsics.go
+++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go
@ -1250,15 +1250,19 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.SetElem", opLen2Imm8(ssa.OpSetElemUint64x2, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Int16x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int16x16.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int32x4.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int32x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int64x2.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int64x4.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int64x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint16x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint16x16.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint32x4.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint32x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint64x2.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint64x8, types.TypeVec512), sys.AMD64)
@ -1298,23 +1302,39 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint64x2, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Uint64x4.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint64x4, types.TypeVec256, 0), sys.AMD64)
 	addF(simdPackage, "Uint64x8.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int16x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int16x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int32x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int64x2.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int64x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int64x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint16x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint16x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint32x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint64x2.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int16x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int16x16.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int32x4.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int32x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int64x2.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int64x4.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int64x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint16x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint16x16.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint32x4.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint32x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint64x2.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint64x8, types.TypeVec512), sys.AMD64)
@ -1354,22 +1374,24 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint64x2, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Uint64x4.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint64x4, types.TypeVec256, 0), sys.AMD64)
 	addF(simdPackage, "Uint64x8.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int16x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int16x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int32x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int64x2.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int64x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int64x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint16x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint16x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint32x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint64x2.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftAllRightSignExtendedMasked", opLen3(ssa.OpShiftAllRightSignExtendedMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftAllRightSignExtendedMasked", opLen3(ssa.OpShiftAllRightSignExtendedMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftAllRightSignExtendedMasked", opLen3(ssa.OpShiftAllRightSignExtendedMaskedInt64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int16x8.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int16x16.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int16x32.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x32, types.TypeVec512), sys.AMD64)
@ -1514,42 +1536,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int8x16.Sign", opLen2(ssa.OpSignInt8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.Sign", opLen2(ssa.OpSignInt8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int16x8.Sign", opLen2(ssa.OpSignInt16x8, types.TypeVec128), sys.AMD64)
--- a/src/simd/ops_amd64.go
+++ b/src/simd/ops_amd64.go
@ -6883,6 +6883,11 @@ func (x Int16x8) ShiftAllLeft(y uint64) Int16x8
 // Asm: VPSLLW, CPU Feature: AVX2
 func (x Int16x16) ShiftAllLeft(y uint64) Int16x16

+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLW, CPU Feature: AVX512EVEX
+func (x Int16x32) ShiftAllLeft(y uint64) Int16x32
+
 // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
 // Asm: VPSLLD, CPU Feature: AVX
@ -6893,6 +6898,11 @@ func (x Int32x4) ShiftAllLeft(y uint64) Int32x4
 // Asm: VPSLLD, CPU Feature: AVX2
 func (x Int32x8) ShiftAllLeft(y uint64) Int32x8

+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLD, CPU Feature: AVX512EVEX
+func (x Int32x16) ShiftAllLeft(y uint64) Int32x16
+
 // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
 // Asm: VPSLLQ, CPU Feature: AVX
@ -6918,6 +6928,11 @@ func (x Uint16x8) ShiftAllLeft(y uint64) Uint16x8
 // Asm: VPSLLW, CPU Feature: AVX2
 func (x Uint16x16) ShiftAllLeft(y uint64) Uint16x16

+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLW, CPU Feature: AVX512EVEX
+func (x Uint16x32) ShiftAllLeft(y uint64) Uint16x32
+
 // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
 // Asm: VPSLLD, CPU Feature: AVX
@ -6928,6 +6943,11 @@ func (x Uint32x4) ShiftAllLeft(y uint64) Uint32x4
 // Asm: VPSLLD, CPU Feature: AVX2
 func (x Uint32x8) ShiftAllLeft(y uint64) Uint32x8

+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLD, CPU Feature: AVX512EVEX
+func (x Uint32x16) ShiftAllLeft(y uint64) Uint32x16
+
 // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
 // Asm: VPSLLQ, CPU Feature: AVX
@ -7237,6 +7257,36 @@ func (x Uint64x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x8, z

 /* ShiftAllLeftMasked */

+// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLW, CPU Feature: AVX512EVEX
+func (x Int16x8) ShiftAllLeftMasked(y uint64, z Mask16x8) Int16x8
+
+// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLW, CPU Feature: AVX512EVEX
+func (x Int16x16) ShiftAllLeftMasked(y uint64, z Mask16x16) Int16x16
+
+// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLW, CPU Feature: AVX512EVEX
+func (x Int16x32) ShiftAllLeftMasked(y uint64, z Mask16x32) Int16x32
+
+// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLD, CPU Feature: AVX512EVEX
+func (x Int32x4) ShiftAllLeftMasked(y uint64, z Mask32x4) Int32x4
+
+// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLD, CPU Feature: AVX512EVEX
+func (x Int32x8) ShiftAllLeftMasked(y uint64, z Mask32x8) Int32x8
+
+// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLD, CPU Feature: AVX512EVEX
+func (x Int32x16) ShiftAllLeftMasked(y uint64, z Mask32x16) Int32x16
+
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
 // Asm: VPSLLQ, CPU Feature: AVX512EVEX
@ -7252,6 +7302,36 @@ func (x Int64x4) ShiftAllLeftMasked(y uint64, z Mask64x4) Int64x4
 // Asm: VPSLLQ, CPU Feature: AVX512EVEX
 func (x Int64x8) ShiftAllLeftMasked(y uint64, z Mask64x8) Int64x8

+// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLW, CPU Feature: AVX512EVEX
+func (x Uint16x8) ShiftAllLeftMasked(y uint64, z Mask16x8) Uint16x8
+
+// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLW, CPU Feature: AVX512EVEX
+func (x Uint16x16) ShiftAllLeftMasked(y uint64, z Mask16x16) Uint16x16
+
+// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLW, CPU Feature: AVX512EVEX
+func (x Uint16x32) ShiftAllLeftMasked(y uint64, z Mask16x32) Uint16x32
+
+// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLD, CPU Feature: AVX512EVEX
+func (x Uint32x4) ShiftAllLeftMasked(y uint64, z Mask32x4) Uint32x4
+
+// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLD, CPU Feature: AVX512EVEX
+func (x Uint32x8) ShiftAllLeftMasked(y uint64, z Mask32x8) Uint32x8
+
+// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLD, CPU Feature: AVX512EVEX
+func (x Uint32x16) ShiftAllLeftMasked(y uint64, z Mask32x16) Uint32x16
+
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
 // Asm: VPSLLQ, CPU Feature: AVX512EVEX
@ -7269,39 +7349,49 @@ func (x Uint64x8) ShiftAllLeftMasked(y uint64, z Mask64x8) Uint64x8

 /* ShiftAllRight */

-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLW, CPU Feature: AVX
+// Asm: VPSRAW, CPU Feature: AVX
 func (x Int16x8) ShiftAllRight(y uint64) Int16x8

-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLW, CPU Feature: AVX2
+// Asm: VPSRAW, CPU Feature: AVX2
 func (x Int16x16) ShiftAllRight(y uint64) Int16x16

-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLD, CPU Feature: AVX
+// Asm: VPSRAW, CPU Feature: AVX512EVEX
+func (x Int16x32) ShiftAllRight(y uint64) Int16x32
+
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAD, CPU Feature: AVX
 func (x Int32x4) ShiftAllRight(y uint64) Int32x4

-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLD, CPU Feature: AVX2
+// Asm: VPSRAD, CPU Feature: AVX2
 func (x Int32x8) ShiftAllRight(y uint64) Int32x8

-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLQ, CPU Feature: AVX
+// Asm: VPSRAD, CPU Feature: AVX512EVEX
+func (x Int32x16) ShiftAllRight(y uint64) Int32x16
+
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAQ, CPU Feature: AVX512EVEX
 func (x Int64x2) ShiftAllRight(y uint64) Int64x2

-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLQ, CPU Feature: AVX2
+// Asm: VPSRAQ, CPU Feature: AVX512EVEX
 func (x Int64x4) ShiftAllRight(y uint64) Int64x4

-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLQ, CPU Feature: AVX512EVEX
+// Asm: VPSRAQ, CPU Feature: AVX512EVEX
 func (x Int64x8) ShiftAllRight(y uint64) Int64x8

 // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
@ -7314,6 +7404,11 @@ func (x Uint16x8) ShiftAllRight(y uint64) Uint16x8
 // Asm: VPSRLW, CPU Feature: AVX2
 func (x Uint16x16) ShiftAllRight(y uint64) Uint16x16

+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLW, CPU Feature: AVX512EVEX
+func (x Uint16x32) ShiftAllRight(y uint64) Uint16x32
+
 // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
 // Asm: VPSRLD, CPU Feature: AVX
@ -7324,6 +7419,11 @@ func (x Uint32x4) ShiftAllRight(y uint64) Uint32x4
 // Asm: VPSRLD, CPU Feature: AVX2
 func (x Uint32x8) ShiftAllRight(y uint64) Uint32x8

+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLD, CPU Feature: AVX512EVEX
+func (x Uint32x16) ShiftAllRight(y uint64) Uint32x16
+
 // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
 // Asm: VPSRLQ, CPU Feature: AVX
@ -7633,20 +7733,80 @@ func (x Uint64x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x8, z

 /* ShiftAllRightMasked */

-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLQ, CPU Feature: AVX512EVEX
+// Asm: VPSRAW, CPU Feature: AVX512EVEX
+func (x Int16x8) ShiftAllRightMasked(y uint64, z Mask16x8) Int16x8
+
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAW, CPU Feature: AVX512EVEX
+func (x Int16x16) ShiftAllRightMasked(y uint64, z Mask16x16) Int16x16
+
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAW, CPU Feature: AVX512EVEX
+func (x Int16x32) ShiftAllRightMasked(y uint64, z Mask16x32) Int16x32
+
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAD, CPU Feature: AVX512EVEX
+func (x Int32x4) ShiftAllRightMasked(y uint64, z Mask32x4) Int32x4
+
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAD, CPU Feature: AVX512EVEX
+func (x Int32x8) ShiftAllRightMasked(y uint64, z Mask32x8) Int32x8
+
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAD, CPU Feature: AVX512EVEX
+func (x Int32x16) ShiftAllRightMasked(y uint64, z Mask32x16) Int32x16
+
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAQ, CPU Feature: AVX512EVEX
 func (x Int64x2) ShiftAllRightMasked(y uint64, z Mask64x2) Int64x2

-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLQ, CPU Feature: AVX512EVEX
+// Asm: VPSRAQ, CPU Feature: AVX512EVEX
 func (x Int64x4) ShiftAllRightMasked(y uint64, z Mask64x4) Int64x4

+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAQ, CPU Feature: AVX512EVEX
+func (x Int64x8) ShiftAllRightMasked(y uint64, z Mask64x8) Int64x8
+
 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
-// Asm: VPSRLQ, CPU Feature: AVX512EVEX
-func (x Int64x8) ShiftAllRightMasked(y uint64, z Mask64x8) Int64x8
+// Asm: VPSRLW, CPU Feature: AVX512EVEX
+func (x Uint16x8) ShiftAllRightMasked(y uint64, z Mask16x8) Uint16x8
+
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLW, CPU Feature: AVX512EVEX
+func (x Uint16x16) ShiftAllRightMasked(y uint64, z Mask16x16) Uint16x16
+
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLW, CPU Feature: AVX512EVEX
+func (x Uint16x32) ShiftAllRightMasked(y uint64, z Mask16x32) Uint16x32
+
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLD, CPU Feature: AVX512EVEX
+func (x Uint32x4) ShiftAllRightMasked(y uint64, z Mask32x4) Uint32x4
+
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLD, CPU Feature: AVX512EVEX
+func (x Uint32x8) ShiftAllRightMasked(y uint64, z Mask32x8) Uint32x8
+
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLD, CPU Feature: AVX512EVEX
+func (x Uint32x16) ShiftAllRightMasked(y uint64, z Mask32x16) Uint32x16

 // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
@ -7663,60 +7823,6 @@ func (x Uint64x4) ShiftAllRightMasked(y uint64, z Mask64x4) Uint64x4
 // Asm: VPSRLQ, CPU Feature: AVX512EVEX
 func (x Uint64x8) ShiftAllRightMasked(y uint64, z Mask64x8) Uint64x8

-/* ShiftAllRightSignExtended */
-
-// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAW, CPU Feature: AVX
-func (x Int16x8) ShiftAllRightSignExtended(y uint64) Int16x8
-
-// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAW, CPU Feature: AVX2
-func (x Int16x16) ShiftAllRightSignExtended(y uint64) Int16x16
-
-// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAD, CPU Feature: AVX
-func (x Int32x4) ShiftAllRightSignExtended(y uint64) Int32x4
-
-// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAD, CPU Feature: AVX2
-func (x Int32x8) ShiftAllRightSignExtended(y uint64) Int32x8
-
-// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAQ, CPU Feature: AVX512EVEX
-func (x Int64x2) ShiftAllRightSignExtended(y uint64) Int64x2
-
-// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAQ, CPU Feature: AVX512EVEX
-func (x Int64x4) ShiftAllRightSignExtended(y uint64) Int64x4
-
-// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAQ, CPU Feature: AVX512EVEX
-func (x Int64x8) ShiftAllRightSignExtended(y uint64) Int64x8
-
-/* ShiftAllRightSignExtendedMasked */
-
-// ShiftAllRightSignExtendedMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAQ, CPU Feature: AVX512EVEX
-func (x Int64x2) ShiftAllRightSignExtendedMasked(y uint64, z Mask64x2) Int64x2
-
-// ShiftAllRightSignExtendedMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAQ, CPU Feature: AVX512EVEX
-func (x Int64x4) ShiftAllRightSignExtendedMasked(y uint64, z Mask64x4) Int64x4
-
-// ShiftAllRightSignExtendedMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAQ, CPU Feature: AVX512EVEX
-func (x Int64x8) ShiftAllRightSignExtendedMasked(y uint64, z Mask64x8) Int64x8
-
 /* ShiftLeft */

 // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
@ -8123,49 +8229,49 @@ func (x Uint64x8) ShiftLeftMasked(y Uint64x8, z Mask64x8) Uint64x8

 /* ShiftRight */

-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+// Asm: VPSRAVW, CPU Feature: AVX512EVEX
 func (x Int16x8) ShiftRight(y Int16x8) Int16x8

-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+// Asm: VPSRAVW, CPU Feature: AVX512EVEX
 func (x Int16x16) ShiftRight(y Int16x16) Int16x16

-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+// Asm: VPSRAVW, CPU Feature: AVX512EVEX
 func (x Int16x32) ShiftRight(y Int16x32) Int16x32

-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVD, CPU Feature: AVX2
+// Asm: VPSRAVD, CPU Feature: AVX2
 func (x Int32x4) ShiftRight(y Int32x4) Int32x4

-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVD, CPU Feature: AVX2
+// Asm: VPSRAVD, CPU Feature: AVX2
 func (x Int32x8) ShiftRight(y Int32x8) Int32x8

-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVD, CPU Feature: AVX512EVEX
+// Asm: VPSRAVD, CPU Feature: AVX512EVEX
 func (x Int32x16) ShiftRight(y Int32x16) Int32x16

-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVQ, CPU Feature: AVX2
+// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
 func (x Int64x2) ShiftRight(y Int64x2) Int64x2

-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVQ, CPU Feature: AVX2
+// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
 func (x Int64x4) ShiftRight(y Int64x4) Int64x4

-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVQ, CPU Feature: AVX512EVEX
+// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
 func (x Int64x8) ShiftRight(y Int64x8) Int64x8

 // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
@ -8435,49 +8541,49 @@ func (x Uint64x8) ShiftRightAndFillUpperFromMasked(y Uint64x8, z Uint64x8, u Mas

 /* ShiftRightMasked */

-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+// Asm: VPSRAVW, CPU Feature: AVX512EVEX
 func (x Int16x8) ShiftRightMasked(y Int16x8, z Mask16x8) Int16x8

-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+// Asm: VPSRAVW, CPU Feature: AVX512EVEX
 func (x Int16x16) ShiftRightMasked(y Int16x16, z Mask16x16) Int16x16

-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+// Asm: VPSRAVW, CPU Feature: AVX512EVEX
 func (x Int16x32) ShiftRightMasked(y Int16x32, z Mask16x32) Int16x32

-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVD, CPU Feature: AVX512EVEX
+// Asm: VPSRAVD, CPU Feature: AVX512EVEX
 func (x Int32x4) ShiftRightMasked(y Int32x4, z Mask32x4) Int32x4

-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVD, CPU Feature: AVX512EVEX
+// Asm: VPSRAVD, CPU Feature: AVX512EVEX
 func (x Int32x8) ShiftRightMasked(y Int32x8, z Mask32x8) Int32x8

-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVD, CPU Feature: AVX512EVEX
+// Asm: VPSRAVD, CPU Feature: AVX512EVEX
 func (x Int32x16) ShiftRightMasked(y Int32x16, z Mask32x16) Int32x16

-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVQ, CPU Feature: AVX512EVEX
+// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
 func (x Int64x2) ShiftRightMasked(y Int64x2, z Mask64x2) Int64x2

-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVQ, CPU Feature: AVX512EVEX
+// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
 func (x Int64x4) ShiftRightMasked(y Int64x4, z Mask64x4) Int64x4

-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVQ, CPU Feature: AVX512EVEX
+// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
 func (x Int64x8) ShiftRightMasked(y Int64x8, z Mask64x8) Int64x8

 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
@ -8525,190 +8631,6 @@ func (x Uint64x4) ShiftRightMasked(y Uint64x4, z Mask64x4) Uint64x4
 // Asm: VPSRLVQ, CPU Feature: AVX512EVEX
 func (x Uint64x8) ShiftRightMasked(y Uint64x8, z Mask64x8) Uint64x8

-/* ShiftRightSignExtended */
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
-func (x Int16x8) ShiftRightSignExtended(y Int16x8) Int16x8
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
-func (x Int16x16) ShiftRightSignExtended(y Int16x16) Int16x16
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
-func (x Int16x32) ShiftRightSignExtended(y Int16x32) Int16x32
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVD, CPU Feature: AVX2
-func (x Int32x4) ShiftRightSignExtended(y Int32x4) Int32x4
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVD, CPU Feature: AVX2
-func (x Int32x8) ShiftRightSignExtended(y Int32x8) Int32x8
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512EVEX
-func (x Int32x16) ShiftRightSignExtended(y Int32x16) Int32x16
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
-func (x Int64x2) ShiftRightSignExtended(y Int64x2) Int64x2
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
-func (x Int64x4) ShiftRightSignExtended(y Int64x4) Int64x4
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
-func (x Int64x8) ShiftRightSignExtended(y Int64x8) Int64x8
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
-func (x Uint16x8) ShiftRightSignExtended(y Uint16x8) Uint16x8
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
-func (x Uint16x16) ShiftRightSignExtended(y Uint16x16) Uint16x16
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
-func (x Uint16x32) ShiftRightSignExtended(y Uint16x32) Uint16x32
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVD, CPU Feature: AVX2
-func (x Uint32x4) ShiftRightSignExtended(y Uint32x4) Uint32x4
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVD, CPU Feature: AVX2
-func (x Uint32x8) ShiftRightSignExtended(y Uint32x8) Uint32x8
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512EVEX
-func (x Uint32x16) ShiftRightSignExtended(y Uint32x16) Uint32x16
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) ShiftRightSignExtended(y Uint64x2) Uint64x2
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) ShiftRightSignExtended(y Uint64x4) Uint64x4
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) ShiftRightSignExtended(y Uint64x8) Uint64x8
-
-/* ShiftRightSignExtendedMasked */
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
-func (x Int16x8) ShiftRightSignExtendedMasked(y Int16x8, z Mask16x8) Int16x8
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
-func (x Int16x16) ShiftRightSignExtendedMasked(y Int16x16, z Mask16x16) Int16x16
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
-func (x Int16x32) ShiftRightSignExtendedMasked(y Int16x32, z Mask16x32) Int16x32
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512EVEX
-func (x Int32x4) ShiftRightSignExtendedMasked(y Int32x4, z Mask32x4) Int32x4
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512EVEX
-func (x Int32x8) ShiftRightSignExtendedMasked(y Int32x8, z Mask32x8) Int32x8
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512EVEX
-func (x Int32x16) ShiftRightSignExtendedMasked(y Int32x16, z Mask32x16) Int32x16
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
-func (x Int64x2) ShiftRightSignExtendedMasked(y Int64x2, z Mask64x2) Int64x2
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
-func (x Int64x4) ShiftRightSignExtendedMasked(y Int64x4, z Mask64x4) Int64x4
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
-func (x Int64x8) ShiftRightSignExtendedMasked(y Int64x8, z Mask64x8) Int64x8
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
-func (x Uint16x8) ShiftRightSignExtendedMasked(y Uint16x8, z Mask16x8) Uint16x8
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
-func (x Uint16x16) ShiftRightSignExtendedMasked(y Uint16x16, z Mask16x16) Uint16x16
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
-func (x Uint16x32) ShiftRightSignExtendedMasked(y Uint16x32, z Mask16x32) Uint16x32
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512EVEX
-func (x Uint32x4) ShiftRightSignExtendedMasked(y Uint32x4, z Mask32x4) Uint32x4
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512EVEX
-func (x Uint32x8) ShiftRightSignExtendedMasked(y Uint32x8, z Mask32x8) Uint32x8
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512EVEX
-func (x Uint32x16) ShiftRightSignExtendedMasked(y Uint32x16, z Mask32x16) Uint32x16
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) ShiftRightSignExtendedMasked(y Uint64x2, z Mask64x2) Uint64x2
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) ShiftRightSignExtendedMasked(y Uint64x4, z Mask64x4) Uint64x4
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) ShiftRightSignExtendedMasked(y Uint64x8, z Mask64x8) Uint64x8
-
 /* Sign */

 // Sign returns the product of the first operand with -1, 0, or 1,
--- a/src/simd/simd_wrapped_test.go
+++ b/src/simd/simd_wrapped_test.go
@ -2055,8 +2055,6 @@ func testInt16x8Binary(t *testing.T, v0 []int16, v1 []int16, want []int16, which
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sign":
 		gotv = vec0.Sign(vec1)
 	case "Sub":
@ -2101,8 +2099,6 @@ func testInt16x8BinaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16, w
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask16x8())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask16x8())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask16x8())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask16x8())

@ -2356,8 +2352,6 @@ func testInt16x16Binary(t *testing.T, v0 []int16, v1 []int16, want []int16, whic
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sign":
 		gotv = vec0.Sign(vec1)
 	case "Sub":
@ -2402,8 +2396,6 @@ func testInt16x16BinaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16,
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask16x16())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask16x16())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask16x16())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask16x16())

@ -2643,8 +2635,6 @@ func testInt16x32Binary(t *testing.T, v0 []int16, v1 []int16, want []int16, whic
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)

@ -2685,8 +2675,6 @@ func testInt16x32BinaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16,
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask16x32())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask16x32())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask16x32())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask16x32())

@ -2934,8 +2922,6 @@ func testInt32x4Binary(t *testing.T, v0 []int32, v1 []int32, want []int32, which
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sign":
 		gotv = vec0.Sign(vec1)
 	case "Sub":
@ -2984,8 +2970,6 @@ func testInt32x4BinaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32, w
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask32x4())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask32x4())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask32x4())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask32x4())
 	case "XorMasked":
@ -3311,8 +3295,6 @@ func testInt32x8Binary(t *testing.T, v0 []int32, v1 []int32, want []int32, which
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sign":
 		gotv = vec0.Sign(vec1)
 	case "Sub":
@ -3361,8 +3343,6 @@ func testInt32x8BinaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32, w
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask32x8())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask32x8())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask32x8())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask32x8())
 	case "XorMasked":
@ -3684,8 +3664,6 @@ func testInt32x16Binary(t *testing.T, v0 []int32, v1 []int32, want []int32, whic
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@ -3732,8 +3710,6 @@ func testInt32x16BinaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32,
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask32x16())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask32x16())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask32x16())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask32x16())
 	case "XorMasked":
@ -4036,8 +4012,6 @@ func testInt64x2Binary(t *testing.T, v0 []int64, v1 []int64, want []int64, which
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@ -4086,8 +4060,6 @@ func testInt64x2BinaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, w
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask64x2())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask64x2())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask64x2())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask64x2())
 	case "XorMasked":
@ -4292,8 +4264,6 @@ func testInt64x4Binary(t *testing.T, v0 []int64, v1 []int64, want []int64, which
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@ -4342,8 +4312,6 @@ func testInt64x4BinaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, w
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask64x4())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask64x4())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask64x4())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask64x4())
 	case "XorMasked":
@ -4548,8 +4516,6 @@ func testInt64x8Binary(t *testing.T, v0 []int64, v1 []int64, want []int64, which
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@ -4598,8 +4564,6 @@ func testInt64x8BinaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, w
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask64x8())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask64x8())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask64x8())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask64x8())
 	case "XorMasked":
@ -5478,8 +5442,6 @@ func testUint16x8Binary(t *testing.T, v0 []uint16, v1 []uint16, want []uint16, w
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@ -5522,8 +5484,6 @@ func testUint16x8BinaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []int16
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask16x8())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask16x8())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask16x8())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask16x8())

@ -5726,8 +5686,6 @@ func testUint16x16Binary(t *testing.T, v0 []uint16, v1 []uint16, want []uint16,
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@ -5770,8 +5728,6 @@ func testUint16x16BinaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []int1
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask16x16())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask16x16())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask16x16())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask16x16())

@ -5964,8 +5920,6 @@ func testUint16x32Binary(t *testing.T, v0 []uint16, v1 []uint16, want []uint16,
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)

@ -6006,8 +5960,6 @@ func testUint16x32BinaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []int1
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask16x32())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask16x32())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask16x32())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask16x32())

@ -6206,8 +6158,6 @@ func testUint32x4Binary(t *testing.T, v0 []uint32, v1 []uint32, want []uint32, w
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@ -6252,8 +6202,6 @@ func testUint32x4BinaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask32x4())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask32x4())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask32x4())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask32x4())
 	case "XorMasked":
@ -6524,8 +6472,6 @@ func testUint32x8Binary(t *testing.T, v0 []uint32, v1 []uint32, want []uint32, w
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@ -6570,8 +6516,6 @@ func testUint32x8BinaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask32x8())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask32x8())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask32x8())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask32x8())
 	case "XorMasked":
@ -6838,8 +6782,6 @@ func testUint32x16Binary(t *testing.T, v0 []uint32, v1 []uint32, want []uint32,
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@ -6884,8 +6826,6 @@ func testUint32x16BinaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []int3
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask32x16())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask32x16())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask32x16())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask32x16())
 	case "XorMasked":
@ -7133,8 +7073,6 @@ func testUint64x2Binary(t *testing.T, v0 []uint64, v1 []uint64, want []uint64, w
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@ -7181,8 +7119,6 @@ func testUint64x2BinaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask64x2())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask64x2())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask64x2())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask64x2())
 	case "XorMasked":
@ -7381,8 +7317,6 @@ func testUint64x4Binary(t *testing.T, v0 []uint64, v1 []uint64, want []uint64, w
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@ -7429,8 +7363,6 @@ func testUint64x4BinaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask64x4())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask64x4())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask64x4())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask64x4())
 	case "XorMasked":
@ -7629,8 +7561,6 @@ func testUint64x8Binary(t *testing.T, v0 []uint64, v1 []uint64, want []uint64, w
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@ -7677,8 +7607,6 @@ func testUint64x8BinaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask64x8())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask64x8())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask64x8())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask64x8())
 	case "XorMasked":
@ -7884,7 +7812,5 @@ func testUint64x8UnaryMasked(t *testing.T, v0 []uint64, v1 []int64, want []uint6
 // ShiftAllRightAndFillUpperFrom
 // ShiftAllRightAndFillUpperFromMasked
 // ShiftAllRightMasked
-// ShiftAllRightSignExtended
-// ShiftAllRightSignExtendedMasked
 // TruncWithPrecision
 // TruncWithPrecisionMasked