From 0f660d675f6c0ec4759e66328209ceaa7ccfa7eb Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Mon, 18 Aug 2025 21:13:00 +0000
Subject: [PATCH] [dev.simd] simd: make OpMasked machine ops only

Right now we can expect the `Op(...).Masked` idiom to lack many parts that will
make the API incomplete. But to make the API sizes smaller, we are removing these ops' frontend types and interfaces for now. We will have the peepholes and a new pass
checking the CPU features check domination relations to make these ops
picked for the right `Op(...).Masked` idiom.

Change-Id: I77f72a198b3d8b1880dcb911470db5e0089ac1ca
Reviewed-on: https://go-review.googlesource.com/c/go/+/697155
Reviewed-by: Cherry Mui <cherryyz@google.com>
TryBot-Bypass: Junyang Shao <shaojunyang@google.com>
---
 .../compile/internal/ssa/_gen/simdAMD64.rules |   870 -
 .../internal/ssa/_gen/simdgenericOps.go       |   852 -
 src/cmd/compile/internal/ssa/opGen.go         |  5466 -----
 src/cmd/compile/internal/ssa/rewriteAMD64.go  | 18270 ----------------
 .../compile/internal/ssagen/simdintrinsics.go |   852 -
 src/simd/_gen/simdgen/godefs.go               |     6 +
 src/simd/compare_test.go                      |    38 -
 src/simd/ops_amd64.go                         |  6592 ------
 src/simd/simd_test.go                         |     8 +-
 9 files changed, 10 insertions(+), 32944 deletions(-)

diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
index d64f36cf74e..cfe0075986f 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
@@ -12,18 +12,6 @@
 (AbsInt64x2 ...) => (VPABSQ128 ...)
 (AbsInt64x4 ...) => (VPABSQ256 ...)
 (AbsInt64x8 ...) => (VPABSQ512 ...)
-(AbsMaskedInt8x16 x mask) => (VPABSBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(AbsMaskedInt8x32 x mask) => (VPABSBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-(AbsMaskedInt8x64 x mask) => (VPABSBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-(AbsMaskedInt16x8 x mask) => (VPABSWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(AbsMaskedInt16x16 x mask) => (VPABSWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-(AbsMaskedInt16x32 x mask) => (VPABSWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-(AbsMaskedInt32x4 x mask) => (VPABSDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AbsMaskedInt32x8 x mask) => (VPABSDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AbsMaskedInt32x16 x mask) => (VPABSDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AbsMaskedInt64x2 x mask) => (VPABSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AbsMaskedInt64x4 x mask) => (VPABSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AbsMaskedInt64x8 x mask) => (VPABSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (AddFloat32x4 ...) => (VADDPS128 ...)
 (AddFloat32x8 ...) => (VADDPS256 ...)
 (AddFloat32x16 ...) => (VADDPS512 ...)
@@ -57,51 +45,12 @@
 (AddDotProdPairsSaturatedInt32x4 ...) => (VPDPWSSDS128 ...)
 (AddDotProdPairsSaturatedInt32x8 ...) => (VPDPWSSDS256 ...)
 (AddDotProdPairsSaturatedInt32x16 ...) => (VPDPWSSDS512 ...)
-(AddDotProdPairsSaturatedMaskedInt32x4 x y z mask) => (VPDPWSSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AddDotProdPairsSaturatedMaskedInt32x8 x y z mask) => (VPDPWSSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AddDotProdPairsSaturatedMaskedInt32x16 x y z mask) => (VPDPWSSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
 (AddDotProdQuadrupleInt32x4 ...) => (VPDPBUSD128 ...)
 (AddDotProdQuadrupleInt32x8 ...) => (VPDPBUSD256 ...)
 (AddDotProdQuadrupleInt32x16 ...) => (VPDPBUSD512 ...)
-(AddDotProdQuadrupleMaskedInt32x4 x y z mask) => (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AddDotProdQuadrupleMaskedInt32x8 x y z mask) => (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AddDotProdQuadrupleMaskedInt32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
 (AddDotProdQuadrupleSaturatedInt32x4 ...) => (VPDPBUSDS128 ...)
 (AddDotProdQuadrupleSaturatedInt32x8 ...) => (VPDPBUSDS256 ...)
 (AddDotProdQuadrupleSaturatedInt32x16 ...) => (VPDPBUSDS512 ...)
-(AddDotProdQuadrupleSaturatedMaskedInt32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AddDotProdQuadrupleSaturatedMaskedInt32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AddDotProdQuadrupleSaturatedMaskedInt32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AddMaskedFloat32x4 x y mask) => (VADDPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AddMaskedFloat32x8 x y mask) => (VADDPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AddMaskedFloat32x16 x y mask) => (VADDPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AddMaskedFloat64x2 x y mask) => (VADDPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AddMaskedFloat64x4 x y mask) => (VADDPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AddMaskedFloat64x8 x y mask) => (VADDPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(AddMaskedInt8x16 x y mask) => (VPADDBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(AddMaskedInt8x32 x y mask) => (VPADDBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(AddMaskedInt8x64 x y mask) => (VPADDBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(AddMaskedInt16x8 x y mask) => (VPADDWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(AddMaskedInt16x16 x y mask) => (VPADDWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(AddMaskedInt16x32 x y mask) => (VPADDWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(AddMaskedInt32x4 x y mask) => (VPADDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AddMaskedInt32x8 x y mask) => (VPADDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AddMaskedInt32x16 x y mask) => (VPADDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AddMaskedInt64x2 x y mask) => (VPADDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AddMaskedInt64x4 x y mask) => (VPADDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AddMaskedInt64x8 x y mask) => (VPADDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(AddMaskedUint8x16 x y mask) => (VPADDBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(AddMaskedUint8x32 x y mask) => (VPADDBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(AddMaskedUint8x64 x y mask) => (VPADDBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(AddMaskedUint16x8 x y mask) => (VPADDWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(AddMaskedUint16x16 x y mask) => (VPADDWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(AddMaskedUint16x32 x y mask) => (VPADDWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(AddMaskedUint32x4 x y mask) => (VPADDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AddMaskedUint32x8 x y mask) => (VPADDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AddMaskedUint32x16 x y mask) => (VPADDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AddMaskedUint64x2 x y mask) => (VPADDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AddMaskedUint64x4 x y mask) => (VPADDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AddMaskedUint64x8 x y mask) => (VPADDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (AddPairsFloat32x4 ...) => (VHADDPS128 ...)
 (AddPairsFloat32x8 ...) => (VHADDPS256 ...)
 (AddPairsFloat64x2 ...) => (VHADDPD128 ...)
@@ -128,18 +77,6 @@
 (AddSaturatedUint16x8 ...) => (VPADDUSW128 ...)
 (AddSaturatedUint16x16 ...) => (VPADDUSW256 ...)
 (AddSaturatedUint16x32 ...) => (VPADDUSW512 ...)
-(AddSaturatedMaskedInt8x16 x y mask) => (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(AddSaturatedMaskedInt8x32 x y mask) => (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(AddSaturatedMaskedInt8x64 x y mask) => (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(AddSaturatedMaskedInt16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(AddSaturatedMaskedInt16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(AddSaturatedMaskedInt16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint8x16 x y mask) => (VPADDUSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint8x32 x y mask) => (VPADDUSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint8x64 x y mask) => (VPADDUSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint16x8 x y mask) => (VPADDUSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint16x16 x y mask) => (VPADDUSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint16x32 x y mask) => (VPADDUSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
 (AddSubFloat32x4 ...) => (VADDSUBPS128 ...)
 (AddSubFloat32x8 ...) => (VADDSUBPS256 ...)
 (AddSubFloat64x2 ...) => (VADDSUBPD128 ...)
@@ -168,18 +105,6 @@
 (AndUint64x2 ...) => (VPAND128 ...)
 (AndUint64x4 ...) => (VPAND256 ...)
 (AndUint64x8 ...) => (VPANDQ512 ...)
-(AndMaskedInt32x4 x y mask) => (VPANDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AndMaskedInt32x8 x y mask) => (VPANDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AndMaskedInt32x16 x y mask) => (VPANDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AndMaskedInt64x2 x y mask) => (VPANDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AndMaskedInt64x4 x y mask) => (VPANDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AndMaskedInt64x8 x y mask) => (VPANDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(AndMaskedUint32x4 x y mask) => (VPANDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AndMaskedUint32x8 x y mask) => (VPANDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AndMaskedUint32x16 x y mask) => (VPANDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AndMaskedUint64x2 x y mask) => (VPANDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AndMaskedUint64x4 x y mask) => (VPANDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AndMaskedUint64x8 x y mask) => (VPANDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (AndNotInt8x16 ...) => (VPANDN128 ...)
 (AndNotInt8x32 ...) => (VPANDN256 ...)
 (AndNotInt8x64 ...) => (VPANDND512 ...)
@@ -204,30 +129,12 @@
 (AndNotUint64x2 ...) => (VPANDN128 ...)
 (AndNotUint64x4 ...) => (VPANDN256 ...)
 (AndNotUint64x8 ...) => (VPANDNQ512 ...)
-(AndNotMaskedInt32x4 x y mask) => (VPANDNDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AndNotMaskedInt32x8 x y mask) => (VPANDNDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AndNotMaskedInt32x16 x y mask) => (VPANDNDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AndNotMaskedInt64x2 x y mask) => (VPANDNQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AndNotMaskedInt64x4 x y mask) => (VPANDNQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AndNotMaskedInt64x8 x y mask) => (VPANDNQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(AndNotMaskedUint32x4 x y mask) => (VPANDNDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AndNotMaskedUint32x8 x y mask) => (VPANDNDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AndNotMaskedUint32x16 x y mask) => (VPANDNDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AndNotMaskedUint64x2 x y mask) => (VPANDNQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AndNotMaskedUint64x4 x y mask) => (VPANDNQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AndNotMaskedUint64x8 x y mask) => (VPANDNQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (AverageUint8x16 ...) => (VPAVGB128 ...)
 (AverageUint8x32 ...) => (VPAVGB256 ...)
 (AverageUint8x64 ...) => (VPAVGB512 ...)
 (AverageUint16x8 ...) => (VPAVGW128 ...)
 (AverageUint16x16 ...) => (VPAVGW256 ...)
 (AverageUint16x32 ...) => (VPAVGW512 ...)
-(AverageMaskedUint8x16 x y mask) => (VPAVGBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(AverageMaskedUint8x32 x y mask) => (VPAVGBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(AverageMaskedUint8x64 x y mask) => (VPAVGBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(AverageMaskedUint16x8 x y mask) => (VPAVGWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(AverageMaskedUint16x16 x y mask) => (VPAVGWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(AverageMaskedUint16x32 x y mask) => (VPAVGWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
 (Broadcast128Float32x4 ...) => (VBROADCASTSS128 ...)
 (Broadcast128Float64x2 ...) => (VPBROADCASTQ128 ...)
 (Broadcast128Int8x16 ...) => (VPBROADCASTB128 ...)
@@ -238,16 +145,6 @@
 (Broadcast128Uint16x8 ...) => (VPBROADCASTW128 ...)
 (Broadcast128Uint32x4 ...) => (VPBROADCASTD128 ...)
 (Broadcast128Uint64x2 ...) => (VPBROADCASTQ128 ...)
-(Broadcast128MaskedFloat32x4 x mask) => (VBROADCASTSSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast128MaskedFloat64x2 x mask) => (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Broadcast128MaskedInt8x16 x mask) => (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Broadcast128MaskedInt16x8 x mask) => (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Broadcast128MaskedInt32x4 x mask) => (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast128MaskedInt64x2 x mask) => (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Broadcast128MaskedUint8x16 x mask) => (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Broadcast128MaskedUint16x8 x mask) => (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Broadcast128MaskedUint32x4 x mask) => (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast128MaskedUint64x2 x mask) => (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
 (Broadcast256Float32x4 ...) => (VBROADCASTSS256 ...)
 (Broadcast256Float64x2 ...) => (VBROADCASTSD256 ...)
 (Broadcast256Int8x16 ...) => (VPBROADCASTB256 ...)
@@ -258,16 +155,6 @@
 (Broadcast256Uint16x8 ...) => (VPBROADCASTW256 ...)
 (Broadcast256Uint32x4 ...) => (VPBROADCASTD256 ...)
 (Broadcast256Uint64x2 ...) => (VPBROADCASTQ256 ...)
-(Broadcast256MaskedFloat32x4 x mask) => (VBROADCASTSSMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast256MaskedFloat64x2 x mask) => (VBROADCASTSDMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Broadcast256MaskedInt8x16 x mask) => (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Broadcast256MaskedInt16x8 x mask) => (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Broadcast256MaskedInt32x4 x mask) => (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast256MaskedInt64x2 x mask) => (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Broadcast256MaskedUint8x16 x mask) => (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Broadcast256MaskedUint16x8 x mask) => (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Broadcast256MaskedUint32x4 x mask) => (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast256MaskedUint64x2 x mask) => (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
 (Broadcast512Float32x4 ...) => (VBROADCASTSS512 ...)
 (Broadcast512Float64x2 ...) => (VBROADCASTSD512 ...)
 (Broadcast512Int8x16 ...) => (VPBROADCASTB512 ...)
@@ -278,16 +165,6 @@
 (Broadcast512Uint16x8 ...) => (VPBROADCASTW512 ...)
 (Broadcast512Uint32x4 ...) => (VPBROADCASTD512 ...)
 (Broadcast512Uint64x2 ...) => (VPBROADCASTQ512 ...)
-(Broadcast512MaskedFloat32x4 x mask) => (VBROADCASTSSMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast512MaskedFloat64x2 x mask) => (VBROADCASTSDMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Broadcast512MaskedInt8x16 x mask) => (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Broadcast512MaskedInt16x8 x mask) => (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Broadcast512MaskedInt32x4 x mask) => (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast512MaskedInt64x2 x mask) => (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Broadcast512MaskedUint8x16 x mask) => (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Broadcast512MaskedUint16x8 x mask) => (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Broadcast512MaskedUint32x4 x mask) => (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast512MaskedUint64x2 x mask) => (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
 (CeilFloat32x4 x) => (VROUNDPS128 [2] x)
 (CeilFloat32x8 x) => (VROUNDPS256 [2] x)
 (CeilFloat64x2 x) => (VROUNDPD128 [2] x)
@@ -298,24 +175,12 @@
 (CeilScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+2] x)
 (CeilScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+2] x)
 (CeilScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+2] x)
-(CeilScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(CeilScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(CeilScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(CeilScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(CeilScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(CeilScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (CeilScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+2] x)
 (CeilScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+2] x)
 (CeilScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+2] x)
 (CeilScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+2] x)
 (CeilScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+2] x)
 (CeilScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+2] x)
-(CeilScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(CeilScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(CeilScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(CeilScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(CeilScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(CeilScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (CompressFloat32x4 x mask) => (VCOMPRESSPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
 (CompressFloat32x8 x mask) => (VCOMPRESSPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
 (CompressFloat32x16 x mask) => (VCOMPRESSPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
@@ -349,15 +214,9 @@
 (ConvertToInt32Float32x4 ...) => (VCVTTPS2DQ128 ...)
 (ConvertToInt32Float32x8 ...) => (VCVTTPS2DQ256 ...)
 (ConvertToInt32Float32x16 ...) => (VCVTTPS2DQ512 ...)
-(ConvertToInt32MaskedFloat32x4 x mask) => (VCVTTPS2DQMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ConvertToInt32MaskedFloat32x8 x mask) => (VCVTTPS2DQMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ConvertToInt32MaskedFloat32x16 x mask) => (VCVTTPS2DQMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
 (ConvertToUint32Float32x4 ...) => (VCVTPS2UDQ128 ...)
 (ConvertToUint32Float32x8 ...) => (VCVTPS2UDQ256 ...)
 (ConvertToUint32Float32x16 ...) => (VCVTPS2UDQ512 ...)
-(ConvertToUint32MaskedFloat32x4 x mask) => (VCVTPS2UDQMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ConvertToUint32MaskedFloat32x8 x mask) => (VCVTPS2UDQMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ConvertToUint32MaskedFloat32x16 x mask) => (VCVTPS2UDQMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
 (CopySignInt8x16 ...) => (VPSIGNB128 ...)
 (CopySignInt8x32 ...) => (VPSIGNB256 ...)
 (CopySignInt16x8 ...) => (VPSIGNW128 ...)
@@ -370,24 +229,12 @@
 (DivFloat64x2 ...) => (VDIVPD128 ...)
 (DivFloat64x4 ...) => (VDIVPD256 ...)
 (DivFloat64x8 ...) => (VDIVPD512 ...)
-(DivMaskedFloat32x4 x y mask) => (VDIVPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(DivMaskedFloat32x8 x y mask) => (VDIVPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(DivMaskedFloat32x16 x y mask) => (VDIVPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(DivMaskedFloat64x2 x y mask) => (VDIVPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(DivMaskedFloat64x4 x y mask) => (VDIVPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(DivMaskedFloat64x8 x y mask) => (VDIVPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (DotProdPairsInt16x8 ...) => (VPMADDWD128 ...)
 (DotProdPairsInt16x16 ...) => (VPMADDWD256 ...)
 (DotProdPairsInt16x32 ...) => (VPMADDWD512 ...)
-(DotProdPairsMaskedInt16x8 x y mask) => (VPMADDWDMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(DotProdPairsMaskedInt16x16 x y mask) => (VPMADDWDMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(DotProdPairsMaskedInt16x32 x y mask) => (VPMADDWDMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
 (DotProdPairsSaturatedUint8x16 ...) => (VPMADDUBSW128 ...)
 (DotProdPairsSaturatedUint8x32 ...) => (VPMADDUBSW256 ...)
 (DotProdPairsSaturatedUint8x64 ...) => (VPMADDUBSW512 ...)
-(DotProdPairsSaturatedMaskedUint8x16 x y mask) => (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(DotProdPairsSaturatedMaskedUint8x32 x y mask) => (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(DotProdPairsSaturatedMaskedUint8x64 x y mask) => (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
 (EqualFloat32x4 x y) => (VCMPPS128 [0] x y)
 (EqualFloat32x8 x y) => (VCMPPS256 [0] x y)
 (EqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [0] x y))
@@ -418,36 +265,6 @@
 (EqualUint64x2 ...) => (VPCMPEQQ128 ...)
 (EqualUint64x4 ...) => (VPCMPEQQ256 ...)
 (EqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPEQQ512 x y))
-(EqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(EqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(EqualMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(EqualMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(EqualMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(EqualMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(EqualMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [0] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(EqualMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [0] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(EqualMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [0] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(EqualMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [0] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(EqualMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [0] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(EqualMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [0] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(EqualMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(EqualMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(EqualMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(EqualMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(EqualMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(EqualMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(EqualMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [0] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(EqualMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [0] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(EqualMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [0] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(EqualMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [0] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(EqualMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [0] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(EqualMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [0] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(EqualMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(EqualMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(EqualMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(EqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(EqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(EqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
 (ExpandFloat32x4 x mask) => (VEXPANDPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
 (ExpandFloat32x8 x mask) => (VEXPANDPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
 (ExpandFloat32x16 x mask) => (VEXPANDPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
@@ -488,42 +305,21 @@
 (FloorScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+1] x)
 (FloorScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+1] x)
 (FloorScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+1] x)
-(FloorScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(FloorScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(FloorScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(FloorScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(FloorScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(FloorScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (FloorScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+1] x)
 (FloorScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+1] x)
 (FloorScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+1] x)
 (FloorScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+1] x)
 (FloorScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+1] x)
 (FloorScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+1] x)
-(FloorScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(FloorScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(FloorScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(FloorScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(FloorScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(FloorScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (GaloisFieldAffineTransformUint8x16 ...) => (VGF2P8AFFINEQB128 ...)
 (GaloisFieldAffineTransformUint8x32 ...) => (VGF2P8AFFINEQB256 ...)
 (GaloisFieldAffineTransformUint8x64 ...) => (VGF2P8AFFINEQB512 ...)
 (GaloisFieldAffineTransformInverseUint8x16 ...) => (VGF2P8AFFINEINVQB128 ...)
 (GaloisFieldAffineTransformInverseUint8x32 ...) => (VGF2P8AFFINEINVQB256 ...)
 (GaloisFieldAffineTransformInverseUint8x64 ...) => (VGF2P8AFFINEINVQB512 ...)
-(GaloisFieldAffineTransformInverseMaskedUint8x16 [a] x y mask) => (VGF2P8AFFINEINVQBMasked128 [a] x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(GaloisFieldAffineTransformInverseMaskedUint8x32 [a] x y mask) => (VGF2P8AFFINEINVQBMasked256 [a] x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(GaloisFieldAffineTransformInverseMaskedUint8x64 [a] x y mask) => (VGF2P8AFFINEINVQBMasked512 [a] x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(GaloisFieldAffineTransformMaskedUint8x16 [a] x y mask) => (VGF2P8AFFINEQBMasked128 [a] x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(GaloisFieldAffineTransformMaskedUint8x32 [a] x y mask) => (VGF2P8AFFINEQBMasked256 [a] x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(GaloisFieldAffineTransformMaskedUint8x64 [a] x y mask) => (VGF2P8AFFINEQBMasked512 [a] x y (VPMOVVec8x64ToM <types.TypeMask> mask))
 (GaloisFieldMulUint8x16 ...) => (VGF2P8MULB128 ...)
 (GaloisFieldMulUint8x32 ...) => (VGF2P8MULB256 ...)
 (GaloisFieldMulUint8x64 ...) => (VGF2P8MULB512 ...)
-(GaloisFieldMulMaskedUint8x16 x y mask) => (VGF2P8MULBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(GaloisFieldMulMaskedUint8x32 x y mask) => (VGF2P8MULBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(GaloisFieldMulMaskedUint8x64 x y mask) => (VGF2P8MULBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
 (GetElemFloat32x4 ...) => (VPEXTRD128 ...)
 (GetElemFloat64x2 ...) => (VPEXTRQ128 ...)
 (GetElemInt8x16 ...) => (VPEXTRB128 ...)
@@ -610,78 +406,12 @@
 (GreaterEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [13] x y))
 (GreaterEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [13] x y))
 (GreaterEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [13] x y))
-(GreaterEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [13] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [13] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [13] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [13] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [13] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [13] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [13] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [13] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [13] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [13] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [13] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [13] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [13] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [13] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [13] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [13] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [13] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [13] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [13] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [13] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [13] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [13] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [13] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [13] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(GreaterMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [14] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(GreaterMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [14] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(GreaterMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [14] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(GreaterMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [14] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(GreaterMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [14] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(GreaterMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [14] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(GreaterMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [14] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(GreaterMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [14] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(GreaterMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [14] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(GreaterMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [14] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(GreaterMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [14] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(GreaterMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [14] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(GreaterMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [14] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(GreaterMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [14] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(GreaterMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [14] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(GreaterMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [14] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(GreaterMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [14] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(GreaterMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [14] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(GreaterMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [14] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(GreaterMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [14] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(GreaterMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [14] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(GreaterMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [14] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(GreaterMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [14] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(GreaterMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [14] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(GreaterMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [14] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(GreaterMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [14] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(GreaterMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [14] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(GreaterMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [14] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(GreaterMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [14] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(GreaterMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [14] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
 (IsNanFloat32x4 x y) => (VCMPPS128 [3] x y)
 (IsNanFloat32x8 x y) => (VCMPPS256 [3] x y)
 (IsNanFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [3] x y))
 (IsNanFloat64x2 x y) => (VCMPPD128 [3] x y)
 (IsNanFloat64x4 x y) => (VCMPPD256 [3] x y)
 (IsNanFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [3] x y))
-(IsNanMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [3] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(IsNanMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [3] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(IsNanMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [3] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(IsNanMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [3] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(IsNanMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [3] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(IsNanMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [3] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
 (LessFloat32x4 x y) => (VCMPPS128 [1] x y)
 (LessFloat32x8 x y) => (VCMPPS256 [1] x y)
 (LessFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [1] x y))
@@ -710,66 +440,6 @@
 (LessEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [2] x y))
 (LessEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [2] x y))
 (LessEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [2] x y))
-(LessEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(LessEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [2] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [2] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(LessEqualMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [2] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(LessEqualMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [2] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [2] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [2] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [2] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [2] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [2] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [2] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [2] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [2] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [2] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [2] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [2] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [2] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [2] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [2] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [2] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [2] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [2] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [2] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [2] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [2] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(LessMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [1] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(LessMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [1] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(LessMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [1] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(LessMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [1] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(LessMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [1] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(LessMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [1] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(LessMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [1] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(LessMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [1] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(LessMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [1] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(LessMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [1] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(LessMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [1] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(LessMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [1] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(LessMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [1] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(LessMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [1] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(LessMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [1] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(LessMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [1] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(LessMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [1] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(LessMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [1] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(LessMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [1] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(LessMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [1] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(LessMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [1] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(LessMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [1] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(LessMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [1] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(LessMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [1] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(LessMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [1] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(LessMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [1] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(LessMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [1] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(LessMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [1] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(LessMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [1] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(LessMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [1] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
 (MaxFloat32x4 ...) => (VMAXPS128 ...)
 (MaxFloat32x8 ...) => (VMAXPS256 ...)
 (MaxFloat32x16 ...) => (VMAXPS512 ...)
@@ -800,36 +470,6 @@
 (MaxUint64x2 ...) => (VPMAXUQ128 ...)
 (MaxUint64x4 ...) => (VPMAXUQ256 ...)
 (MaxUint64x8 ...) => (VPMAXUQ512 ...)
-(MaxMaskedFloat32x4 x y mask) => (VMAXPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MaxMaskedFloat32x8 x y mask) => (VMAXPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MaxMaskedFloat32x16 x y mask) => (VMAXPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MaxMaskedFloat64x2 x y mask) => (VMAXPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MaxMaskedFloat64x4 x y mask) => (VMAXPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MaxMaskedFloat64x8 x y mask) => (VMAXPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(MaxMaskedInt8x16 x y mask) => (VPMAXSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(MaxMaskedInt8x32 x y mask) => (VPMAXSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(MaxMaskedInt8x64 x y mask) => (VPMAXSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(MaxMaskedInt16x8 x y mask) => (VPMAXSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MaxMaskedInt16x16 x y mask) => (VPMAXSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MaxMaskedInt16x32 x y mask) => (VPMAXSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MaxMaskedInt32x4 x y mask) => (VPMAXSDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MaxMaskedInt32x8 x y mask) => (VPMAXSDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MaxMaskedInt32x16 x y mask) => (VPMAXSDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MaxMaskedInt64x2 x y mask) => (VPMAXSQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MaxMaskedInt64x4 x y mask) => (VPMAXSQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MaxMaskedInt64x8 x y mask) => (VPMAXSQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(MaxMaskedUint8x16 x y mask) => (VPMAXUBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(MaxMaskedUint8x32 x y mask) => (VPMAXUBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(MaxMaskedUint8x64 x y mask) => (VPMAXUBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(MaxMaskedUint16x8 x y mask) => (VPMAXUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MaxMaskedUint16x16 x y mask) => (VPMAXUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MaxMaskedUint16x32 x y mask) => (VPMAXUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MaxMaskedUint32x4 x y mask) => (VPMAXUDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MaxMaskedUint32x8 x y mask) => (VPMAXUDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MaxMaskedUint32x16 x y mask) => (VPMAXUDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MaxMaskedUint64x2 x y mask) => (VPMAXUQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MaxMaskedUint64x4 x y mask) => (VPMAXUQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MaxMaskedUint64x8 x y mask) => (VPMAXUQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (MinFloat32x4 ...) => (VMINPS128 ...)
 (MinFloat32x8 ...) => (VMINPS256 ...)
 (MinFloat32x16 ...) => (VMINPS512 ...)
@@ -860,36 +500,6 @@
 (MinUint64x2 ...) => (VPMINUQ128 ...)
 (MinUint64x4 ...) => (VPMINUQ256 ...)
 (MinUint64x8 ...) => (VPMINUQ512 ...)
-(MinMaskedFloat32x4 x y mask) => (VMINPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MinMaskedFloat32x8 x y mask) => (VMINPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MinMaskedFloat32x16 x y mask) => (VMINPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MinMaskedFloat64x2 x y mask) => (VMINPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MinMaskedFloat64x4 x y mask) => (VMINPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MinMaskedFloat64x8 x y mask) => (VMINPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(MinMaskedInt8x16 x y mask) => (VPMINSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(MinMaskedInt8x32 x y mask) => (VPMINSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(MinMaskedInt8x64 x y mask) => (VPMINSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(MinMaskedInt16x8 x y mask) => (VPMINSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MinMaskedInt16x16 x y mask) => (VPMINSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MinMaskedInt16x32 x y mask) => (VPMINSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MinMaskedInt32x4 x y mask) => (VPMINSDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MinMaskedInt32x8 x y mask) => (VPMINSDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MinMaskedInt32x16 x y mask) => (VPMINSDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MinMaskedInt64x2 x y mask) => (VPMINSQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MinMaskedInt64x4 x y mask) => (VPMINSQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MinMaskedInt64x8 x y mask) => (VPMINSQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(MinMaskedUint8x16 x y mask) => (VPMINUBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(MinMaskedUint8x32 x y mask) => (VPMINUBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(MinMaskedUint8x64 x y mask) => (VPMINUBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(MinMaskedUint16x8 x y mask) => (VPMINUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MinMaskedUint16x16 x y mask) => (VPMINUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MinMaskedUint16x32 x y mask) => (VPMINUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MinMaskedUint32x4 x y mask) => (VPMINUDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MinMaskedUint32x8 x y mask) => (VPMINUDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MinMaskedUint32x16 x y mask) => (VPMINUDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MinMaskedUint64x2 x y mask) => (VPMINUQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MinMaskedUint64x4 x y mask) => (VPMINUQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MinMaskedUint64x8 x y mask) => (VPMINUQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (MulFloat32x4 ...) => (VMULPS128 ...)
 (MulFloat32x8 ...) => (VMULPS256 ...)
 (MulFloat32x16 ...) => (VMULPS512 ...)
@@ -920,24 +530,12 @@
 (MulAddFloat64x2 ...) => (VFMADD213PD128 ...)
 (MulAddFloat64x4 ...) => (VFMADD213PD256 ...)
 (MulAddFloat64x8 ...) => (VFMADD213PD512 ...)
-(MulAddMaskedFloat32x4 x y z mask) => (VFMADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MulAddMaskedFloat32x8 x y z mask) => (VFMADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MulAddMaskedFloat32x16 x y z mask) => (VFMADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MulAddMaskedFloat64x2 x y z mask) => (VFMADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulAddMaskedFloat64x4 x y z mask) => (VFMADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulAddMaskedFloat64x8 x y z mask) => (VFMADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
 (MulAddSubFloat32x4 ...) => (VFMADDSUB213PS128 ...)
 (MulAddSubFloat32x8 ...) => (VFMADDSUB213PS256 ...)
 (MulAddSubFloat32x16 ...) => (VFMADDSUB213PS512 ...)
 (MulAddSubFloat64x2 ...) => (VFMADDSUB213PD128 ...)
 (MulAddSubFloat64x4 ...) => (VFMADDSUB213PD256 ...)
 (MulAddSubFloat64x8 ...) => (VFMADDSUB213PD512 ...)
-(MulAddSubMaskedFloat32x4 x y z mask) => (VFMADDSUB213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MulAddSubMaskedFloat32x8 x y z mask) => (VFMADDSUB213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MulAddSubMaskedFloat32x16 x y z mask) => (VFMADDSUB213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MulAddSubMaskedFloat64x2 x y z mask) => (VFMADDSUB213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulAddSubMaskedFloat64x4 x y z mask) => (VFMADDSUB213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulAddSubMaskedFloat64x8 x y z mask) => (VFMADDSUB213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
 (MulEvenWidenInt32x4 ...) => (VPMULDQ128 ...)
 (MulEvenWidenInt32x8 ...) => (VPMULDQ256 ...)
 (MulEvenWidenUint32x4 ...) => (VPMULUDQ128 ...)
@@ -948,48 +546,12 @@
 (MulHighUint16x8 ...) => (VPMULHUW128 ...)
 (MulHighUint16x16 ...) => (VPMULHUW256 ...)
 (MulHighUint16x32 ...) => (VPMULHUW512 ...)
-(MulHighMaskedInt16x8 x y mask) => (VPMULHWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MulHighMaskedInt16x16 x y mask) => (VPMULHWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MulHighMaskedInt16x32 x y mask) => (VPMULHWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MulHighMaskedUint16x8 x y mask) => (VPMULHUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MulHighMaskedUint16x16 x y mask) => (VPMULHUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MulHighMaskedUint16x32 x y mask) => (VPMULHUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MulMaskedFloat32x4 x y mask) => (VMULPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MulMaskedFloat32x8 x y mask) => (VMULPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MulMaskedFloat32x16 x y mask) => (VMULPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MulMaskedFloat64x2 x y mask) => (VMULPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulMaskedFloat64x4 x y mask) => (VMULPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulMaskedFloat64x8 x y mask) => (VMULPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(MulMaskedInt16x8 x y mask) => (VPMULLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MulMaskedInt16x16 x y mask) => (VPMULLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MulMaskedInt16x32 x y mask) => (VPMULLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MulMaskedInt32x4 x y mask) => (VPMULLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MulMaskedInt32x8 x y mask) => (VPMULLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MulMaskedInt32x16 x y mask) => (VPMULLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MulMaskedInt64x2 x y mask) => (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulMaskedInt64x4 x y mask) => (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulMaskedInt64x8 x y mask) => (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(MulMaskedUint16x8 x y mask) => (VPMULLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MulMaskedUint16x16 x y mask) => (VPMULLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MulMaskedUint16x32 x y mask) => (VPMULLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MulMaskedUint32x4 x y mask) => (VPMULLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MulMaskedUint32x8 x y mask) => (VPMULLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MulMaskedUint32x16 x y mask) => (VPMULLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MulMaskedUint64x2 x y mask) => (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulMaskedUint64x4 x y mask) => (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulMaskedUint64x8 x y mask) => (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (MulSubAddFloat32x4 ...) => (VFMSUBADD213PS128 ...)
 (MulSubAddFloat32x8 ...) => (VFMSUBADD213PS256 ...)
 (MulSubAddFloat32x16 ...) => (VFMSUBADD213PS512 ...)
 (MulSubAddFloat64x2 ...) => (VFMSUBADD213PD128 ...)
 (MulSubAddFloat64x4 ...) => (VFMSUBADD213PD256 ...)
 (MulSubAddFloat64x8 ...) => (VFMSUBADD213PD512 ...)
-(MulSubAddMaskedFloat32x4 x y z mask) => (VFMSUBADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MulSubAddMaskedFloat32x8 x y z mask) => (VFMSUBADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MulSubAddMaskedFloat32x16 x y z mask) => (VFMSUBADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MulSubAddMaskedFloat64x2 x y z mask) => (VFMSUBADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulSubAddMaskedFloat64x4 x y z mask) => (VFMSUBADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulSubAddMaskedFloat64x8 x y z mask) => (VFMSUBADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
 (NotEqualFloat32x4 x y) => (VCMPPS128 [4] x y)
 (NotEqualFloat32x8 x y) => (VCMPPS256 [4] x y)
 (NotEqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [4] x y))
@@ -1004,36 +566,6 @@
 (NotEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [4] x y))
 (NotEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [4] x y))
 (NotEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [4] x y))
-(NotEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(NotEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [4] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(NotEqualMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(NotEqualMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [4] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [4] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [4] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [4] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [4] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [4] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [4] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [4] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [4] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [4] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [4] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [4] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [4] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [4] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
 (OnesCountInt8x16 ...) => (VPOPCNTB128 ...)
 (OnesCountInt8x32 ...) => (VPOPCNTB256 ...)
 (OnesCountInt8x64 ...) => (VPOPCNTB512 ...)
@@ -1058,30 +590,6 @@
 (OnesCountUint64x2 ...) => (VPOPCNTQ128 ...)
 (OnesCountUint64x4 ...) => (VPOPCNTQ256 ...)
 (OnesCountUint64x8 ...) => (VPOPCNTQ512 ...)
-(OnesCountMaskedInt8x16 x mask) => (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(OnesCountMaskedInt8x32 x mask) => (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-(OnesCountMaskedInt8x64 x mask) => (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-(OnesCountMaskedInt16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(OnesCountMaskedInt16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-(OnesCountMaskedInt16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-(OnesCountMaskedInt32x4 x mask) => (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(OnesCountMaskedInt32x8 x mask) => (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(OnesCountMaskedInt32x16 x mask) => (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(OnesCountMaskedInt64x2 x mask) => (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(OnesCountMaskedInt64x4 x mask) => (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(OnesCountMaskedInt64x8 x mask) => (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-(OnesCountMaskedUint8x16 x mask) => (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(OnesCountMaskedUint8x32 x mask) => (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-(OnesCountMaskedUint8x64 x mask) => (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-(OnesCountMaskedUint16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(OnesCountMaskedUint16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-(OnesCountMaskedUint16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-(OnesCountMaskedUint32x4 x mask) => (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(OnesCountMaskedUint32x8 x mask) => (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(OnesCountMaskedUint32x16 x mask) => (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(OnesCountMaskedUint64x2 x mask) => (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(OnesCountMaskedUint64x4 x mask) => (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(OnesCountMaskedUint64x8 x mask) => (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (OrInt8x16 ...) => (VPOR128 ...)
 (OrInt8x32 ...) => (VPOR256 ...)
 (OrInt8x64 ...) => (VPORD512 ...)
@@ -1106,18 +614,6 @@
 (OrUint64x2 ...) => (VPOR128 ...)
 (OrUint64x4 ...) => (VPOR256 ...)
 (OrUint64x8 ...) => (VPORQ512 ...)
-(OrMaskedInt32x4 x y mask) => (VPORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(OrMaskedInt32x8 x y mask) => (VPORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(OrMaskedInt32x16 x y mask) => (VPORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(OrMaskedInt64x2 x y mask) => (VPORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(OrMaskedInt64x4 x y mask) => (VPORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(OrMaskedInt64x8 x y mask) => (VPORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(OrMaskedUint32x4 x y mask) => (VPORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(OrMaskedUint32x8 x y mask) => (VPORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(OrMaskedUint32x16 x y mask) => (VPORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(OrMaskedUint64x2 x y mask) => (VPORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(OrMaskedUint64x4 x y mask) => (VPORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(OrMaskedUint64x8 x y mask) => (VPORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (PermuteFloat32x8 ...) => (VPERMPS256 ...)
 (PermuteFloat32x16 ...) => (VPERMPS512 ...)
 (PermuteFloat64x4 ...) => (VPERMPD256 ...)
@@ -1172,84 +668,18 @@
 (Permute2Uint64x2 ...) => (VPERMI2Q128 ...)
 (Permute2Uint64x4 ...) => (VPERMI2Q256 ...)
 (Permute2Uint64x8 ...) => (VPERMI2Q512 ...)
-(Permute2MaskedFloat32x4 x y z mask) => (VPERMI2PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Permute2MaskedFloat32x8 x y z mask) => (VPERMI2PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(Permute2MaskedFloat32x16 x y z mask) => (VPERMI2PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(Permute2MaskedFloat64x2 x y z mask) => (VPERMI2PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Permute2MaskedFloat64x4 x y z mask) => (VPERMI2PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(Permute2MaskedFloat64x8 x y z mask) => (VPERMI2PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(Permute2MaskedInt8x16 x y z mask) => (VPERMI2BMasked128 x y z (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Permute2MaskedInt8x32 x y z mask) => (VPERMI2BMasked256 x y z (VPMOVVec8x32ToM <types.TypeMask> mask))
-(Permute2MaskedInt8x64 x y z mask) => (VPERMI2BMasked512 x y z (VPMOVVec8x64ToM <types.TypeMask> mask))
-(Permute2MaskedInt16x8 x y z mask) => (VPERMI2WMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Permute2MaskedInt16x16 x y z mask) => (VPERMI2WMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(Permute2MaskedInt16x32 x y z mask) => (VPERMI2WMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(Permute2MaskedInt32x4 x y z mask) => (VPERMI2DMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Permute2MaskedInt32x8 x y z mask) => (VPERMI2DMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(Permute2MaskedInt32x16 x y z mask) => (VPERMI2DMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(Permute2MaskedInt64x2 x y z mask) => (VPERMI2QMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Permute2MaskedInt64x4 x y z mask) => (VPERMI2QMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(Permute2MaskedInt64x8 x y z mask) => (VPERMI2QMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(Permute2MaskedUint8x16 x y z mask) => (VPERMI2BMasked128 x y z (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Permute2MaskedUint8x32 x y z mask) => (VPERMI2BMasked256 x y z (VPMOVVec8x32ToM <types.TypeMask> mask))
-(Permute2MaskedUint8x64 x y z mask) => (VPERMI2BMasked512 x y z (VPMOVVec8x64ToM <types.TypeMask> mask))
-(Permute2MaskedUint16x8 x y z mask) => (VPERMI2WMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Permute2MaskedUint16x16 x y z mask) => (VPERMI2WMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(Permute2MaskedUint16x32 x y z mask) => (VPERMI2WMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(Permute2MaskedUint32x4 x y z mask) => (VPERMI2DMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Permute2MaskedUint32x8 x y z mask) => (VPERMI2DMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(Permute2MaskedUint32x16 x y z mask) => (VPERMI2DMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(Permute2MaskedUint64x2 x y z mask) => (VPERMI2QMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Permute2MaskedUint64x4 x y z mask) => (VPERMI2QMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(Permute2MaskedUint64x8 x y z mask) => (VPERMI2QMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(PermuteMaskedFloat32x8 x y mask) => (VPERMPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(PermuteMaskedFloat32x16 x y mask) => (VPERMPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(PermuteMaskedFloat64x4 x y mask) => (VPERMPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(PermuteMaskedFloat64x8 x y mask) => (VPERMPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(PermuteMaskedInt8x16 x y mask) => (VPERMBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(PermuteMaskedInt8x32 x y mask) => (VPERMBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(PermuteMaskedInt8x64 x y mask) => (VPERMBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(PermuteMaskedInt16x8 x y mask) => (VPERMWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(PermuteMaskedInt16x16 x y mask) => (VPERMWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(PermuteMaskedInt16x32 x y mask) => (VPERMWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(PermuteMaskedInt32x8 x y mask) => (VPERMDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(PermuteMaskedInt32x16 x y mask) => (VPERMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(PermuteMaskedInt64x4 x y mask) => (VPERMQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(PermuteMaskedInt64x8 x y mask) => (VPERMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(PermuteMaskedUint8x16 x y mask) => (VPERMBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(PermuteMaskedUint8x32 x y mask) => (VPERMBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(PermuteMaskedUint8x64 x y mask) => (VPERMBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(PermuteMaskedUint16x8 x y mask) => (VPERMWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(PermuteMaskedUint16x16 x y mask) => (VPERMWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(PermuteMaskedUint16x32 x y mask) => (VPERMWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(PermuteMaskedUint32x8 x y mask) => (VPERMDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(PermuteMaskedUint32x16 x y mask) => (VPERMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(PermuteMaskedUint64x4 x y mask) => (VPERMQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(PermuteMaskedUint64x8 x y mask) => (VPERMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ReciprocalFloat32x4 ...) => (VRCPPS128 ...)
 (ReciprocalFloat32x8 ...) => (VRCPPS256 ...)
 (ReciprocalFloat32x16 ...) => (VRCP14PS512 ...)
 (ReciprocalFloat64x2 ...) => (VRCP14PD128 ...)
 (ReciprocalFloat64x4 ...) => (VRCP14PD256 ...)
 (ReciprocalFloat64x8 ...) => (VRCP14PD512 ...)
-(ReciprocalMaskedFloat32x4 x mask) => (VRCP14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ReciprocalMaskedFloat32x8 x mask) => (VRCP14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ReciprocalMaskedFloat32x16 x mask) => (VRCP14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ReciprocalMaskedFloat64x2 x mask) => (VRCP14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ReciprocalMaskedFloat64x4 x mask) => (VRCP14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ReciprocalMaskedFloat64x8 x mask) => (VRCP14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ReciprocalSqrtFloat32x4 ...) => (VRSQRTPS128 ...)
 (ReciprocalSqrtFloat32x8 ...) => (VRSQRTPS256 ...)
 (ReciprocalSqrtFloat32x16 ...) => (VRSQRT14PS512 ...)
 (ReciprocalSqrtFloat64x2 ...) => (VRSQRT14PD128 ...)
 (ReciprocalSqrtFloat64x4 ...) => (VRSQRT14PD256 ...)
 (ReciprocalSqrtFloat64x8 ...) => (VRSQRT14PD512 ...)
-(ReciprocalSqrtMaskedFloat32x4 x mask) => (VRSQRT14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ReciprocalSqrtMaskedFloat32x8 x mask) => (VRSQRT14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ReciprocalSqrtMaskedFloat32x16 x mask) => (VRSQRT14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ReciprocalSqrtMaskedFloat64x2 x mask) => (VRSQRT14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ReciprocalSqrtMaskedFloat64x4 x mask) => (VRSQRT14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ReciprocalSqrtMaskedFloat64x8 x mask) => (VRSQRT14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (RotateAllLeftInt32x4 ...) => (VPROLD128 ...)
 (RotateAllLeftInt32x8 ...) => (VPROLD256 ...)
 (RotateAllLeftInt32x16 ...) => (VPROLD512 ...)
@@ -1262,18 +692,6 @@
 (RotateAllLeftUint64x2 ...) => (VPROLQ128 ...)
 (RotateAllLeftUint64x4 ...) => (VPROLQ256 ...)
 (RotateAllLeftUint64x8 ...) => (VPROLQ512 ...)
-(RotateAllLeftMaskedInt32x4 [a] x mask) => (VPROLDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedInt32x8 [a] x mask) => (VPROLDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedInt32x16 [a] x mask) => (VPROLDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedInt64x2 [a] x mask) => (VPROLQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedInt64x4 [a] x mask) => (VPROLQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedInt64x8 [a] x mask) => (VPROLQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedUint32x4 [a] x mask) => (VPROLDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedUint32x8 [a] x mask) => (VPROLDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedUint32x16 [a] x mask) => (VPROLDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedUint64x2 [a] x mask) => (VPROLQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedUint64x4 [a] x mask) => (VPROLQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedUint64x8 [a] x mask) => (VPROLQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (RotateAllRightInt32x4 ...) => (VPRORD128 ...)
 (RotateAllRightInt32x8 ...) => (VPRORD256 ...)
 (RotateAllRightInt32x16 ...) => (VPRORD512 ...)
@@ -1286,18 +704,6 @@
 (RotateAllRightUint64x2 ...) => (VPRORQ128 ...)
 (RotateAllRightUint64x4 ...) => (VPRORQ256 ...)
 (RotateAllRightUint64x8 ...) => (VPRORQ512 ...)
-(RotateAllRightMaskedInt32x4 [a] x mask) => (VPRORDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateAllRightMaskedInt32x8 [a] x mask) => (VPRORDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateAllRightMaskedInt32x16 [a] x mask) => (VPRORDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateAllRightMaskedInt64x2 [a] x mask) => (VPRORQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateAllRightMaskedInt64x4 [a] x mask) => (VPRORQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateAllRightMaskedInt64x8 [a] x mask) => (VPRORQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-(RotateAllRightMaskedUint32x4 [a] x mask) => (VPRORDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateAllRightMaskedUint32x8 [a] x mask) => (VPRORDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateAllRightMaskedUint32x16 [a] x mask) => (VPRORDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateAllRightMaskedUint64x2 [a] x mask) => (VPRORQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateAllRightMaskedUint64x4 [a] x mask) => (VPRORQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateAllRightMaskedUint64x8 [a] x mask) => (VPRORQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (RotateLeftInt32x4 ...) => (VPROLVD128 ...)
 (RotateLeftInt32x8 ...) => (VPROLVD256 ...)
 (RotateLeftInt32x16 ...) => (VPROLVD512 ...)
@@ -1310,18 +716,6 @@
 (RotateLeftUint64x2 ...) => (VPROLVQ128 ...)
 (RotateLeftUint64x4 ...) => (VPROLVQ256 ...)
 (RotateLeftUint64x8 ...) => (VPROLVQ512 ...)
-(RotateLeftMaskedInt32x4 x y mask) => (VPROLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateLeftMaskedInt32x8 x y mask) => (VPROLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateLeftMaskedInt32x16 x y mask) => (VPROLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateLeftMaskedInt64x2 x y mask) => (VPROLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateLeftMaskedInt64x4 x y mask) => (VPROLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateLeftMaskedInt64x8 x y mask) => (VPROLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(RotateLeftMaskedUint32x4 x y mask) => (VPROLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateLeftMaskedUint32x8 x y mask) => (VPROLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateLeftMaskedUint32x16 x y mask) => (VPROLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateLeftMaskedUint64x2 x y mask) => (VPROLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateLeftMaskedUint64x4 x y mask) => (VPROLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateLeftMaskedUint64x8 x y mask) => (VPROLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (RotateRightInt32x4 ...) => (VPRORVD128 ...)
 (RotateRightInt32x8 ...) => (VPRORVD256 ...)
 (RotateRightInt32x16 ...) => (VPRORVD512 ...)
@@ -1334,18 +728,6 @@
 (RotateRightUint64x2 ...) => (VPRORVQ128 ...)
 (RotateRightUint64x4 ...) => (VPRORVQ256 ...)
 (RotateRightUint64x8 ...) => (VPRORVQ512 ...)
-(RotateRightMaskedInt32x4 x y mask) => (VPRORVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateRightMaskedInt32x8 x y mask) => (VPRORVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateRightMaskedInt32x16 x y mask) => (VPRORVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateRightMaskedInt64x2 x y mask) => (VPRORVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateRightMaskedInt64x4 x y mask) => (VPRORVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateRightMaskedInt64x8 x y mask) => (VPRORVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(RotateRightMaskedUint32x4 x y mask) => (VPRORVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateRightMaskedUint32x8 x y mask) => (VPRORVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateRightMaskedUint32x16 x y mask) => (VPRORVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateRightMaskedUint64x2 x y mask) => (VPRORVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateRightMaskedUint64x4 x y mask) => (VPRORVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateRightMaskedUint64x8 x y mask) => (VPRORVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (RoundToEvenFloat32x4 x) => (VROUNDPS128 [0] x)
 (RoundToEvenFloat32x8 x) => (VROUNDPS256 [0] x)
 (RoundToEvenFloat64x2 x) => (VROUNDPD128 [0] x)
@@ -1356,36 +738,18 @@
 (RoundToEvenScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+0] x)
 (RoundToEvenScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+0] x)
 (RoundToEvenScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+0] x)
-(RoundToEvenScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RoundToEvenScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RoundToEvenScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RoundToEvenScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RoundToEvenScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RoundToEvenScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (RoundToEvenScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+0] x)
 (RoundToEvenScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+0] x)
 (RoundToEvenScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+0] x)
 (RoundToEvenScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+0] x)
 (RoundToEvenScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+0] x)
 (RoundToEvenScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+0] x)
-(RoundToEvenScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RoundToEvenScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RoundToEvenScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RoundToEvenScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RoundToEvenScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RoundToEvenScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ScaleFloat32x4 ...) => (VSCALEFPS128 ...)
 (ScaleFloat32x8 ...) => (VSCALEFPS256 ...)
 (ScaleFloat32x16 ...) => (VSCALEFPS512 ...)
 (ScaleFloat64x2 ...) => (VSCALEFPD128 ...)
 (ScaleFloat64x4 ...) => (VSCALEFPD256 ...)
 (ScaleFloat64x8 ...) => (VSCALEFPD512 ...)
-(ScaleMaskedFloat32x4 x y mask) => (VSCALEFPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ScaleMaskedFloat32x8 x y mask) => (VSCALEFPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ScaleMaskedFloat32x16 x y mask) => (VSCALEFPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ScaleMaskedFloat64x2 x y mask) => (VSCALEFPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ScaleMaskedFloat64x4 x y mask) => (VSCALEFPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ScaleMaskedFloat64x8 x y mask) => (VSCALEFPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (SetElemFloat32x4 ...) => (VPINSRD128 ...)
 (SetElemFloat64x2 ...) => (VPINSRQ128 ...)
 (SetElemInt8x16 ...) => (VPINSRB128 ...)
@@ -1481,51 +845,6 @@
 (ShiftAllLeftConcatUint64x2 ...) => (VPSHLDQ128 ...)
 (ShiftAllLeftConcatUint64x4 ...) => (VPSHLDQ256 ...)
 (ShiftAllLeftConcatUint64x8 ...) => (VPSHLDQ512 ...)
-(ShiftAllLeftConcatMaskedInt16x8 [a] x y mask) => (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt16x16 [a] x y mask) => (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt16x32 [a] x y mask) => (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt32x4 [a] x y mask) => (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt32x8 [a] x y mask) => (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt32x16 [a] x y mask) => (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint16x8 [a] x y mask) => (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint16x16 [a] x y mask) => (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint16x32 [a] x y mask) => (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint32x4 [a] x y mask) => (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint32x8 [a] x y mask) => (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint32x16 [a] x y mask) => (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(VPSLLWMasked128 x (MOVQconst [c]) mask) => (VPSLLWMasked128const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt16x8 x y mask) => (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPSLLWMasked256 x (MOVQconst [c]) mask) => (VPSLLWMasked256const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt16x16 x y mask) => (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPSLLWMasked512 x (MOVQconst [c]) mask) => (VPSLLWMasked512const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt16x32 x y mask) => (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(VPSLLDMasked128 x (MOVQconst [c]) mask) => (VPSLLDMasked128const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt32x4 x y mask) => (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPSLLDMasked256 x (MOVQconst [c]) mask) => (VPSLLDMasked256const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt32x8 x y mask) => (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPSLLDMasked512 x (MOVQconst [c]) mask) => (VPSLLDMasked512const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt32x16 x y mask) => (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(VPSLLQMasked128 x (MOVQconst [c]) mask) => (VPSLLQMasked128const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPSLLQMasked256 x (MOVQconst [c]) mask) => (VPSLLQMasked256const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPSLLQMasked512 x (MOVQconst [c]) mask) => (VPSLLQMasked512const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint16x8 x y mask) => (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint16x16 x y mask) => (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint16x32 x y mask) => (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint32x4 x y mask) => (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint32x8 x y mask) => (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint32x16 x y mask) => (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ShiftAllRightInt16x8 ...) => (VPSRAW128 ...)
 (VPSRAW128 x (MOVQconst [c])) => (VPSRAW128const [uint8(c)] x)
 (ShiftAllRightInt16x16 ...) => (VPSRAW256 ...)
@@ -1571,51 +890,6 @@
 (ShiftAllRightConcatUint64x2 ...) => (VPSHRDQ128 ...)
 (ShiftAllRightConcatUint64x4 ...) => (VPSHRDQ256 ...)
 (ShiftAllRightConcatUint64x8 ...) => (VPSHRDQ512 ...)
-(ShiftAllRightConcatMaskedInt16x8 [a] x y mask) => (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt16x16 [a] x y mask) => (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt16x32 [a] x y mask) => (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt32x4 [a] x y mask) => (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt32x8 [a] x y mask) => (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt32x16 [a] x y mask) => (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint16x8 [a] x y mask) => (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint16x16 [a] x y mask) => (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint16x32 [a] x y mask) => (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint32x4 [a] x y mask) => (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint32x8 [a] x y mask) => (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint32x16 [a] x y mask) => (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(VPSRAWMasked128 x (MOVQconst [c]) mask) => (VPSRAWMasked128const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt16x8 x y mask) => (VPSRAWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPSRAWMasked256 x (MOVQconst [c]) mask) => (VPSRAWMasked256const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt16x16 x y mask) => (VPSRAWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPSRAWMasked512 x (MOVQconst [c]) mask) => (VPSRAWMasked512const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt16x32 x y mask) => (VPSRAWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(VPSRADMasked128 x (MOVQconst [c]) mask) => (VPSRADMasked128const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt32x4 x y mask) => (VPSRADMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPSRADMasked256 x (MOVQconst [c]) mask) => (VPSRADMasked256const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt32x8 x y mask) => (VPSRADMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPSRADMasked512 x (MOVQconst [c]) mask) => (VPSRADMasked512const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt32x16 x y mask) => (VPSRADMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(VPSRAQMasked128 x (MOVQconst [c]) mask) => (VPSRAQMasked128const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt64x2 x y mask) => (VPSRAQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPSRAQMasked256 x (MOVQconst [c]) mask) => (VPSRAQMasked256const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt64x4 x y mask) => (VPSRAQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPSRAQMasked512 x (MOVQconst [c]) mask) => (VPSRAQMasked512const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt64x8 x y mask) => (VPSRAQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint16x8 x y mask) => (VPSRLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint16x16 x y mask) => (VPSRLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint16x32 x y mask) => (VPSRLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint32x4 x y mask) => (VPSRLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint32x8 x y mask) => (VPSRLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint32x16 x y mask) => (VPSRLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint64x2 x y mask) => (VPSRLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint64x4 x y mask) => (VPSRLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint64x8 x y mask) => (VPSRLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ShiftLeftInt16x8 ...) => (VPSLLVW128 ...)
 (ShiftLeftInt16x16 ...) => (VPSLLVW256 ...)
 (ShiftLeftInt16x32 ...) => (VPSLLVW512 ...)
@@ -1652,42 +926,6 @@
 (ShiftLeftConcatUint64x2 ...) => (VPSHLDVQ128 ...)
 (ShiftLeftConcatUint64x4 ...) => (VPSHLDVQ256 ...)
 (ShiftLeftConcatUint64x8 ...) => (VPSHLDVQ512 ...)
-(ShiftLeftConcatMaskedInt16x8 x y z mask) => (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt16x16 x y z mask) => (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt16x32 x y z mask) => (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt32x4 x y z mask) => (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt32x8 x y z mask) => (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt32x16 x y z mask) => (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt64x2 x y z mask) => (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt64x4 x y z mask) => (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt64x8 x y z mask) => (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint16x8 x y z mask) => (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint16x16 x y z mask) => (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint16x32 x y z mask) => (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint32x4 x y z mask) => (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint32x8 x y z mask) => (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint32x16 x y z mask) => (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint64x2 x y z mask) => (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint64x4 x y z mask) => (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint64x8 x y z mask) => (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt16x8 x y mask) => (VPSLLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt16x16 x y mask) => (VPSLLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt16x32 x y mask) => (VPSLLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt32x4 x y mask) => (VPSLLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt32x8 x y mask) => (VPSLLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt32x16 x y mask) => (VPSLLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt64x2 x y mask) => (VPSLLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt64x4 x y mask) => (VPSLLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt64x8 x y mask) => (VPSLLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint16x8 x y mask) => (VPSLLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint16x16 x y mask) => (VPSLLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint16x32 x y mask) => (VPSLLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint32x4 x y mask) => (VPSLLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint32x8 x y mask) => (VPSLLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint32x16 x y mask) => (VPSLLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint64x2 x y mask) => (VPSLLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint64x4 x y mask) => (VPSLLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint64x8 x y mask) => (VPSLLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ShiftRightInt16x8 ...) => (VPSRAVW128 ...)
 (ShiftRightInt16x16 ...) => (VPSRAVW256 ...)
 (ShiftRightInt16x32 ...) => (VPSRAVW512 ...)
@@ -1724,54 +962,12 @@
 (ShiftRightConcatUint64x2 ...) => (VPSHRDVQ128 ...)
 (ShiftRightConcatUint64x4 ...) => (VPSHRDVQ256 ...)
 (ShiftRightConcatUint64x8 ...) => (VPSHRDVQ512 ...)
-(ShiftRightConcatMaskedInt16x8 x y z mask) => (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt16x16 x y z mask) => (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt16x32 x y z mask) => (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt32x4 x y z mask) => (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt32x8 x y z mask) => (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt32x16 x y z mask) => (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt64x2 x y z mask) => (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt64x4 x y z mask) => (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt64x8 x y z mask) => (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint16x8 x y z mask) => (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint16x16 x y z mask) => (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint16x32 x y z mask) => (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint32x4 x y z mask) => (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint32x8 x y z mask) => (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint32x16 x y z mask) => (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint64x2 x y z mask) => (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint64x4 x y z mask) => (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint64x8 x y z mask) => (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt16x8 x y mask) => (VPSRAVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt16x16 x y mask) => (VPSRAVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt16x32 x y mask) => (VPSRAVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt32x4 x y mask) => (VPSRAVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt32x8 x y mask) => (VPSRAVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt32x16 x y mask) => (VPSRAVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt64x2 x y mask) => (VPSRAVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt64x4 x y mask) => (VPSRAVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt64x8 x y mask) => (VPSRAVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint16x8 x y mask) => (VPSRLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint16x16 x y mask) => (VPSRLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint16x32 x y mask) => (VPSRLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint32x4 x y mask) => (VPSRLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint32x8 x y mask) => (VPSRLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint32x16 x y mask) => (VPSRLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint64x2 x y mask) => (VPSRLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint64x4 x y mask) => (VPSRLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint64x8 x y mask) => (VPSRLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (SqrtFloat32x4 ...) => (VSQRTPS128 ...)
 (SqrtFloat32x8 ...) => (VSQRTPS256 ...)
 (SqrtFloat32x16 ...) => (VSQRTPS512 ...)
 (SqrtFloat64x2 ...) => (VSQRTPD128 ...)
 (SqrtFloat64x4 ...) => (VSQRTPD256 ...)
 (SqrtFloat64x8 ...) => (VSQRTPD512 ...)
-(SqrtMaskedFloat32x4 x mask) => (VSQRTPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(SqrtMaskedFloat32x8 x mask) => (VSQRTPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(SqrtMaskedFloat32x16 x mask) => (VSQRTPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(SqrtMaskedFloat64x2 x mask) => (VSQRTPDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(SqrtMaskedFloat64x4 x mask) => (VSQRTPDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(SqrtMaskedFloat64x8 x mask) => (VSQRTPDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (SubFloat32x4 ...) => (VSUBPS128 ...)
 (SubFloat32x8 ...) => (VSUBPS256 ...)
 (SubFloat32x16 ...) => (VSUBPS512 ...)
@@ -1802,36 +998,6 @@
 (SubUint64x2 ...) => (VPSUBQ128 ...)
 (SubUint64x4 ...) => (VPSUBQ256 ...)
 (SubUint64x8 ...) => (VPSUBQ512 ...)
-(SubMaskedFloat32x4 x y mask) => (VSUBPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(SubMaskedFloat32x8 x y mask) => (VSUBPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(SubMaskedFloat32x16 x y mask) => (VSUBPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(SubMaskedFloat64x2 x y mask) => (VSUBPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(SubMaskedFloat64x4 x y mask) => (VSUBPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(SubMaskedFloat64x8 x y mask) => (VSUBPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(SubMaskedInt8x16 x y mask) => (VPSUBBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(SubMaskedInt8x32 x y mask) => (VPSUBBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(SubMaskedInt8x64 x y mask) => (VPSUBBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(SubMaskedInt16x8 x y mask) => (VPSUBWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(SubMaskedInt16x16 x y mask) => (VPSUBWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(SubMaskedInt16x32 x y mask) => (VPSUBWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(SubMaskedInt32x4 x y mask) => (VPSUBDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(SubMaskedInt32x8 x y mask) => (VPSUBDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(SubMaskedInt32x16 x y mask) => (VPSUBDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(SubMaskedInt64x2 x y mask) => (VPSUBQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(SubMaskedInt64x4 x y mask) => (VPSUBQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(SubMaskedInt64x8 x y mask) => (VPSUBQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(SubMaskedUint8x16 x y mask) => (VPSUBBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(SubMaskedUint8x32 x y mask) => (VPSUBBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(SubMaskedUint8x64 x y mask) => (VPSUBBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(SubMaskedUint16x8 x y mask) => (VPSUBWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(SubMaskedUint16x16 x y mask) => (VPSUBWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(SubMaskedUint16x32 x y mask) => (VPSUBWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(SubMaskedUint32x4 x y mask) => (VPSUBDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(SubMaskedUint32x8 x y mask) => (VPSUBDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(SubMaskedUint32x16 x y mask) => (VPSUBDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(SubMaskedUint64x2 x y mask) => (VPSUBQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(SubMaskedUint64x4 x y mask) => (VPSUBQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(SubMaskedUint64x8 x y mask) => (VPSUBQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (SubPairsFloat32x4 ...) => (VHSUBPS128 ...)
 (SubPairsFloat32x8 ...) => (VHSUBPS256 ...)
 (SubPairsFloat64x2 ...) => (VHSUBPD128 ...)
@@ -1858,18 +1024,6 @@
 (SubSaturatedUint16x8 ...) => (VPSUBUSW128 ...)
 (SubSaturatedUint16x16 ...) => (VPSUBUSW256 ...)
 (SubSaturatedUint16x32 ...) => (VPSUBUSW512 ...)
-(SubSaturatedMaskedInt8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(SubSaturatedMaskedInt8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(SubSaturatedMaskedInt8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(SubSaturatedMaskedInt16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(SubSaturatedMaskedInt16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(SubSaturatedMaskedInt16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint8x16 x y mask) => (VPSUBUSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint8x32 x y mask) => (VPSUBUSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint8x64 x y mask) => (VPSUBUSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint16x8 x y mask) => (VPSUBUSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint16x16 x y mask) => (VPSUBUSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint16x32 x y mask) => (VPSUBUSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
 (TruncFloat32x4 x) => (VROUNDPS128 [3] x)
 (TruncFloat32x8 x) => (VROUNDPS256 [3] x)
 (TruncFloat64x2 x) => (VROUNDPD128 [3] x)
@@ -1880,24 +1034,12 @@
 (TruncScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+3] x)
 (TruncScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+3] x)
 (TruncScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+3] x)
-(TruncScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(TruncScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(TruncScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(TruncScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(TruncScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(TruncScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (TruncScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+3] x)
 (TruncScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+3] x)
 (TruncScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+3] x)
 (TruncScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+3] x)
 (TruncScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+3] x)
 (TruncScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+3] x)
-(TruncScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(TruncScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(TruncScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(TruncScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(TruncScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(TruncScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (XorInt8x16 ...) => (VPXOR128 ...)
 (XorInt8x32 ...) => (VPXOR256 ...)
 (XorInt8x64 ...) => (VPXORD512 ...)
@@ -1922,18 +1064,6 @@
 (XorUint64x2 ...) => (VPXOR128 ...)
 (XorUint64x4 ...) => (VPXOR256 ...)
 (XorUint64x8 ...) => (VPXORQ512 ...)
-(XorMaskedInt32x4 x y mask) => (VPXORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(XorMaskedInt32x8 x y mask) => (VPXORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(XorMaskedInt32x16 x y mask) => (VPXORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(XorMaskedInt64x2 x y mask) => (VPXORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(XorMaskedInt64x4 x y mask) => (VPXORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(XorMaskedInt64x8 x y mask) => (VPXORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(XorMaskedUint32x4 x y mask) => (VPXORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(XorMaskedUint32x8 x y mask) => (VPXORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(XorMaskedUint32x16 x y mask) => (VPXORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(XorMaskedUint64x2 x y mask) => (VPXORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(XorMaskedUint64x4 x y mask) => (VPXORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(XorMaskedUint64x8 x y mask) => (VPXORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (blendInt8x16 ...) => (VPBLENDVB128 ...)
 (blendInt8x32 ...) => (VPBLENDVB256 ...)
 (blendMaskedInt8x64 x y mask) => (VPBLENDMBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
index d98c0d8152a..08bfe369511 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
@@ -16,36 +16,15 @@ func simdGenericOps() []opData {
 		{name: "AbsInt64x2", argLength: 1, commutative: false},
 		{name: "AbsInt64x4", argLength: 1, commutative: false},
 		{name: "AbsInt64x8", argLength: 1, commutative: false},
-		{name: "AbsMaskedInt8x16", argLength: 2, commutative: false},
-		{name: "AbsMaskedInt8x32", argLength: 2, commutative: false},
-		{name: "AbsMaskedInt8x64", argLength: 2, commutative: false},
-		{name: "AbsMaskedInt16x8", argLength: 2, commutative: false},
-		{name: "AbsMaskedInt16x16", argLength: 2, commutative: false},
-		{name: "AbsMaskedInt16x32", argLength: 2, commutative: false},
-		{name: "AbsMaskedInt32x4", argLength: 2, commutative: false},
-		{name: "AbsMaskedInt32x8", argLength: 2, commutative: false},
-		{name: "AbsMaskedInt32x16", argLength: 2, commutative: false},
-		{name: "AbsMaskedInt64x2", argLength: 2, commutative: false},
-		{name: "AbsMaskedInt64x4", argLength: 2, commutative: false},
-		{name: "AbsMaskedInt64x8", argLength: 2, commutative: false},
 		{name: "AddDotProdPairsSaturatedInt32x4", argLength: 3, commutative: false},
 		{name: "AddDotProdPairsSaturatedInt32x8", argLength: 3, commutative: false},
 		{name: "AddDotProdPairsSaturatedInt32x16", argLength: 3, commutative: false},
-		{name: "AddDotProdPairsSaturatedMaskedInt32x4", argLength: 4, commutative: false},
-		{name: "AddDotProdPairsSaturatedMaskedInt32x8", argLength: 4, commutative: false},
-		{name: "AddDotProdPairsSaturatedMaskedInt32x16", argLength: 4, commutative: false},
 		{name: "AddDotProdQuadrupleInt32x4", argLength: 3, commutative: false},
 		{name: "AddDotProdQuadrupleInt32x8", argLength: 3, commutative: false},
 		{name: "AddDotProdQuadrupleInt32x16", argLength: 3, commutative: false},
-		{name: "AddDotProdQuadrupleMaskedInt32x4", argLength: 4, commutative: false},
-		{name: "AddDotProdQuadrupleMaskedInt32x8", argLength: 4, commutative: false},
-		{name: "AddDotProdQuadrupleMaskedInt32x16", argLength: 4, commutative: false},
 		{name: "AddDotProdQuadrupleSaturatedInt32x4", argLength: 3, commutative: false},
 		{name: "AddDotProdQuadrupleSaturatedInt32x8", argLength: 3, commutative: false},
 		{name: "AddDotProdQuadrupleSaturatedInt32x16", argLength: 3, commutative: false},
-		{name: "AddDotProdQuadrupleSaturatedMaskedInt32x4", argLength: 4, commutative: false},
-		{name: "AddDotProdQuadrupleSaturatedMaskedInt32x8", argLength: 4, commutative: false},
-		{name: "AddDotProdQuadrupleSaturatedMaskedInt32x16", argLength: 4, commutative: false},
 		{name: "AddFloat32x4", argLength: 2, commutative: true},
 		{name: "AddFloat32x8", argLength: 2, commutative: true},
 		{name: "AddFloat32x16", argLength: 2, commutative: true},
@@ -64,36 +43,6 @@ func simdGenericOps() []opData {
 		{name: "AddInt64x2", argLength: 2, commutative: true},
 		{name: "AddInt64x4", argLength: 2, commutative: true},
 		{name: "AddInt64x8", argLength: 2, commutative: true},
-		{name: "AddMaskedFloat32x4", argLength: 3, commutative: true},
-		{name: "AddMaskedFloat32x8", argLength: 3, commutative: true},
-		{name: "AddMaskedFloat32x16", argLength: 3, commutative: true},
-		{name: "AddMaskedFloat64x2", argLength: 3, commutative: true},
-		{name: "AddMaskedFloat64x4", argLength: 3, commutative: true},
-		{name: "AddMaskedFloat64x8", argLength: 3, commutative: true},
-		{name: "AddMaskedInt8x16", argLength: 3, commutative: true},
-		{name: "AddMaskedInt8x32", argLength: 3, commutative: true},
-		{name: "AddMaskedInt8x64", argLength: 3, commutative: true},
-		{name: "AddMaskedInt16x8", argLength: 3, commutative: true},
-		{name: "AddMaskedInt16x16", argLength: 3, commutative: true},
-		{name: "AddMaskedInt16x32", argLength: 3, commutative: true},
-		{name: "AddMaskedInt32x4", argLength: 3, commutative: true},
-		{name: "AddMaskedInt32x8", argLength: 3, commutative: true},
-		{name: "AddMaskedInt32x16", argLength: 3, commutative: true},
-		{name: "AddMaskedInt64x2", argLength: 3, commutative: true},
-		{name: "AddMaskedInt64x4", argLength: 3, commutative: true},
-		{name: "AddMaskedInt64x8", argLength: 3, commutative: true},
-		{name: "AddMaskedUint8x16", argLength: 3, commutative: true},
-		{name: "AddMaskedUint8x32", argLength: 3, commutative: true},
-		{name: "AddMaskedUint8x64", argLength: 3, commutative: true},
-		{name: "AddMaskedUint16x8", argLength: 3, commutative: true},
-		{name: "AddMaskedUint16x16", argLength: 3, commutative: true},
-		{name: "AddMaskedUint16x32", argLength: 3, commutative: true},
-		{name: "AddMaskedUint32x4", argLength: 3, commutative: true},
-		{name: "AddMaskedUint32x8", argLength: 3, commutative: true},
-		{name: "AddMaskedUint32x16", argLength: 3, commutative: true},
-		{name: "AddMaskedUint64x2", argLength: 3, commutative: true},
-		{name: "AddMaskedUint64x4", argLength: 3, commutative: true},
-		{name: "AddMaskedUint64x8", argLength: 3, commutative: true},
 		{name: "AddPairsFloat32x4", argLength: 2, commutative: false},
 		{name: "AddPairsFloat32x8", argLength: 2, commutative: false},
 		{name: "AddPairsFloat64x2", argLength: 2, commutative: false},
@@ -114,18 +63,6 @@ func simdGenericOps() []opData {
 		{name: "AddSaturatedInt16x8", argLength: 2, commutative: true},
 		{name: "AddSaturatedInt16x16", argLength: 2, commutative: true},
 		{name: "AddSaturatedInt16x32", argLength: 2, commutative: true},
-		{name: "AddSaturatedMaskedInt8x16", argLength: 3, commutative: true},
-		{name: "AddSaturatedMaskedInt8x32", argLength: 3, commutative: true},
-		{name: "AddSaturatedMaskedInt8x64", argLength: 3, commutative: true},
-		{name: "AddSaturatedMaskedInt16x8", argLength: 3, commutative: true},
-		{name: "AddSaturatedMaskedInt16x16", argLength: 3, commutative: true},
-		{name: "AddSaturatedMaskedInt16x32", argLength: 3, commutative: true},
-		{name: "AddSaturatedMaskedUint8x16", argLength: 3, commutative: true},
-		{name: "AddSaturatedMaskedUint8x32", argLength: 3, commutative: true},
-		{name: "AddSaturatedMaskedUint8x64", argLength: 3, commutative: true},
-		{name: "AddSaturatedMaskedUint16x8", argLength: 3, commutative: true},
-		{name: "AddSaturatedMaskedUint16x16", argLength: 3, commutative: true},
-		{name: "AddSaturatedMaskedUint16x32", argLength: 3, commutative: true},
 		{name: "AddSaturatedUint8x16", argLength: 2, commutative: true},
 		{name: "AddSaturatedUint8x32", argLength: 2, commutative: true},
 		{name: "AddSaturatedUint8x64", argLength: 2, commutative: true},
@@ -160,18 +97,6 @@ func simdGenericOps() []opData {
 		{name: "AndInt64x2", argLength: 2, commutative: true},
 		{name: "AndInt64x4", argLength: 2, commutative: true},
 		{name: "AndInt64x8", argLength: 2, commutative: true},
-		{name: "AndMaskedInt32x4", argLength: 3, commutative: true},
-		{name: "AndMaskedInt32x8", argLength: 3, commutative: true},
-		{name: "AndMaskedInt32x16", argLength: 3, commutative: true},
-		{name: "AndMaskedInt64x2", argLength: 3, commutative: true},
-		{name: "AndMaskedInt64x4", argLength: 3, commutative: true},
-		{name: "AndMaskedInt64x8", argLength: 3, commutative: true},
-		{name: "AndMaskedUint32x4", argLength: 3, commutative: true},
-		{name: "AndMaskedUint32x8", argLength: 3, commutative: true},
-		{name: "AndMaskedUint32x16", argLength: 3, commutative: true},
-		{name: "AndMaskedUint64x2", argLength: 3, commutative: true},
-		{name: "AndMaskedUint64x4", argLength: 3, commutative: true},
-		{name: "AndMaskedUint64x8", argLength: 3, commutative: true},
 		{name: "AndNotInt8x16", argLength: 2, commutative: false},
 		{name: "AndNotInt8x32", argLength: 2, commutative: false},
 		{name: "AndNotInt8x64", argLength: 2, commutative: false},
@@ -184,18 +109,6 @@ func simdGenericOps() []opData {
 		{name: "AndNotInt64x2", argLength: 2, commutative: false},
 		{name: "AndNotInt64x4", argLength: 2, commutative: false},
 		{name: "AndNotInt64x8", argLength: 2, commutative: false},
-		{name: "AndNotMaskedInt32x4", argLength: 3, commutative: false},
-		{name: "AndNotMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "AndNotMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "AndNotMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "AndNotMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "AndNotMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "AndNotMaskedUint32x4", argLength: 3, commutative: false},
-		{name: "AndNotMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "AndNotMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "AndNotMaskedUint64x2", argLength: 3, commutative: false},
-		{name: "AndNotMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "AndNotMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "AndNotUint8x16", argLength: 2, commutative: false},
 		{name: "AndNotUint8x32", argLength: 2, commutative: false},
 		{name: "AndNotUint8x64", argLength: 2, commutative: false},
@@ -220,12 +133,6 @@ func simdGenericOps() []opData {
 		{name: "AndUint64x2", argLength: 2, commutative: true},
 		{name: "AndUint64x4", argLength: 2, commutative: true},
 		{name: "AndUint64x8", argLength: 2, commutative: true},
-		{name: "AverageMaskedUint8x16", argLength: 3, commutative: true},
-		{name: "AverageMaskedUint8x32", argLength: 3, commutative: true},
-		{name: "AverageMaskedUint8x64", argLength: 3, commutative: true},
-		{name: "AverageMaskedUint16x8", argLength: 3, commutative: true},
-		{name: "AverageMaskedUint16x16", argLength: 3, commutative: true},
-		{name: "AverageMaskedUint16x32", argLength: 3, commutative: true},
 		{name: "AverageUint8x16", argLength: 2, commutative: true},
 		{name: "AverageUint8x32", argLength: 2, commutative: true},
 		{name: "AverageUint8x64", argLength: 2, commutative: true},
@@ -238,16 +145,6 @@ func simdGenericOps() []opData {
 		{name: "Broadcast128Int16x8", argLength: 1, commutative: false},
 		{name: "Broadcast128Int32x4", argLength: 1, commutative: false},
 		{name: "Broadcast128Int64x2", argLength: 1, commutative: false},
-		{name: "Broadcast128MaskedFloat32x4", argLength: 2, commutative: false},
-		{name: "Broadcast128MaskedFloat64x2", argLength: 2, commutative: false},
-		{name: "Broadcast128MaskedInt8x16", argLength: 2, commutative: false},
-		{name: "Broadcast128MaskedInt16x8", argLength: 2, commutative: false},
-		{name: "Broadcast128MaskedInt32x4", argLength: 2, commutative: false},
-		{name: "Broadcast128MaskedInt64x2", argLength: 2, commutative: false},
-		{name: "Broadcast128MaskedUint8x16", argLength: 2, commutative: false},
-		{name: "Broadcast128MaskedUint16x8", argLength: 2, commutative: false},
-		{name: "Broadcast128MaskedUint32x4", argLength: 2, commutative: false},
-		{name: "Broadcast128MaskedUint64x2", argLength: 2, commutative: false},
 		{name: "Broadcast128Uint8x16", argLength: 1, commutative: false},
 		{name: "Broadcast128Uint16x8", argLength: 1, commutative: false},
 		{name: "Broadcast128Uint32x4", argLength: 1, commutative: false},
@@ -258,16 +155,6 @@ func simdGenericOps() []opData {
 		{name: "Broadcast256Int16x8", argLength: 1, commutative: false},
 		{name: "Broadcast256Int32x4", argLength: 1, commutative: false},
 		{name: "Broadcast256Int64x2", argLength: 1, commutative: false},
-		{name: "Broadcast256MaskedFloat32x4", argLength: 2, commutative: false},
-		{name: "Broadcast256MaskedFloat64x2", argLength: 2, commutative: false},
-		{name: "Broadcast256MaskedInt8x16", argLength: 2, commutative: false},
-		{name: "Broadcast256MaskedInt16x8", argLength: 2, commutative: false},
-		{name: "Broadcast256MaskedInt32x4", argLength: 2, commutative: false},
-		{name: "Broadcast256MaskedInt64x2", argLength: 2, commutative: false},
-		{name: "Broadcast256MaskedUint8x16", argLength: 2, commutative: false},
-		{name: "Broadcast256MaskedUint16x8", argLength: 2, commutative: false},
-		{name: "Broadcast256MaskedUint32x4", argLength: 2, commutative: false},
-		{name: "Broadcast256MaskedUint64x2", argLength: 2, commutative: false},
 		{name: "Broadcast256Uint8x16", argLength: 1, commutative: false},
 		{name: "Broadcast256Uint16x8", argLength: 1, commutative: false},
 		{name: "Broadcast256Uint32x4", argLength: 1, commutative: false},
@@ -278,16 +165,6 @@ func simdGenericOps() []opData {
 		{name: "Broadcast512Int16x8", argLength: 1, commutative: false},
 		{name: "Broadcast512Int32x4", argLength: 1, commutative: false},
 		{name: "Broadcast512Int64x2", argLength: 1, commutative: false},
-		{name: "Broadcast512MaskedFloat32x4", argLength: 2, commutative: false},
-		{name: "Broadcast512MaskedFloat64x2", argLength: 2, commutative: false},
-		{name: "Broadcast512MaskedInt8x16", argLength: 2, commutative: false},
-		{name: "Broadcast512MaskedInt16x8", argLength: 2, commutative: false},
-		{name: "Broadcast512MaskedInt32x4", argLength: 2, commutative: false},
-		{name: "Broadcast512MaskedInt64x2", argLength: 2, commutative: false},
-		{name: "Broadcast512MaskedUint8x16", argLength: 2, commutative: false},
-		{name: "Broadcast512MaskedUint16x8", argLength: 2, commutative: false},
-		{name: "Broadcast512MaskedUint32x4", argLength: 2, commutative: false},
-		{name: "Broadcast512MaskedUint64x2", argLength: 2, commutative: false},
 		{name: "Broadcast512Uint8x16", argLength: 1, commutative: false},
 		{name: "Broadcast512Uint16x8", argLength: 1, commutative: false},
 		{name: "Broadcast512Uint32x4", argLength: 1, commutative: false},
@@ -329,15 +206,9 @@ func simdGenericOps() []opData {
 		{name: "ConvertToInt32Float32x4", argLength: 1, commutative: false},
 		{name: "ConvertToInt32Float32x8", argLength: 1, commutative: false},
 		{name: "ConvertToInt32Float32x16", argLength: 1, commutative: false},
-		{name: "ConvertToInt32MaskedFloat32x4", argLength: 2, commutative: false},
-		{name: "ConvertToInt32MaskedFloat32x8", argLength: 2, commutative: false},
-		{name: "ConvertToInt32MaskedFloat32x16", argLength: 2, commutative: false},
 		{name: "ConvertToUint32Float32x4", argLength: 1, commutative: false},
 		{name: "ConvertToUint32Float32x8", argLength: 1, commutative: false},
 		{name: "ConvertToUint32Float32x16", argLength: 1, commutative: false},
-		{name: "ConvertToUint32MaskedFloat32x4", argLength: 2, commutative: false},
-		{name: "ConvertToUint32MaskedFloat32x8", argLength: 2, commutative: false},
-		{name: "ConvertToUint32MaskedFloat32x16", argLength: 2, commutative: false},
 		{name: "CopySignInt8x16", argLength: 2, commutative: false},
 		{name: "CopySignInt8x32", argLength: 2, commutative: false},
 		{name: "CopySignInt16x8", argLength: 2, commutative: false},
@@ -350,21 +221,9 @@ func simdGenericOps() []opData {
 		{name: "DivFloat64x2", argLength: 2, commutative: false},
 		{name: "DivFloat64x4", argLength: 2, commutative: false},
 		{name: "DivFloat64x8", argLength: 2, commutative: false},
-		{name: "DivMaskedFloat32x4", argLength: 3, commutative: false},
-		{name: "DivMaskedFloat32x8", argLength: 3, commutative: false},
-		{name: "DivMaskedFloat32x16", argLength: 3, commutative: false},
-		{name: "DivMaskedFloat64x2", argLength: 3, commutative: false},
-		{name: "DivMaskedFloat64x4", argLength: 3, commutative: false},
-		{name: "DivMaskedFloat64x8", argLength: 3, commutative: false},
 		{name: "DotProdPairsInt16x8", argLength: 2, commutative: false},
 		{name: "DotProdPairsInt16x16", argLength: 2, commutative: false},
 		{name: "DotProdPairsInt16x32", argLength: 2, commutative: false},
-		{name: "DotProdPairsMaskedInt16x8", argLength: 3, commutative: false},
-		{name: "DotProdPairsMaskedInt16x16", argLength: 3, commutative: false},
-		{name: "DotProdPairsMaskedInt16x32", argLength: 3, commutative: false},
-		{name: "DotProdPairsSaturatedMaskedUint8x16", argLength: 3, commutative: false},
-		{name: "DotProdPairsSaturatedMaskedUint8x32", argLength: 3, commutative: false},
-		{name: "DotProdPairsSaturatedMaskedUint8x64", argLength: 3, commutative: false},
 		{name: "DotProdPairsSaturatedUint8x16", argLength: 2, commutative: false},
 		{name: "DotProdPairsSaturatedUint8x32", argLength: 2, commutative: false},
 		{name: "DotProdPairsSaturatedUint8x64", argLength: 2, commutative: false},
@@ -386,36 +245,6 @@ func simdGenericOps() []opData {
 		{name: "EqualInt64x2", argLength: 2, commutative: true},
 		{name: "EqualInt64x4", argLength: 2, commutative: true},
 		{name: "EqualInt64x8", argLength: 2, commutative: true},
-		{name: "EqualMaskedFloat32x4", argLength: 3, commutative: true},
-		{name: "EqualMaskedFloat32x8", argLength: 3, commutative: true},
-		{name: "EqualMaskedFloat32x16", argLength: 3, commutative: true},
-		{name: "EqualMaskedFloat64x2", argLength: 3, commutative: true},
-		{name: "EqualMaskedFloat64x4", argLength: 3, commutative: true},
-		{name: "EqualMaskedFloat64x8", argLength: 3, commutative: true},
-		{name: "EqualMaskedInt8x16", argLength: 3, commutative: true},
-		{name: "EqualMaskedInt8x32", argLength: 3, commutative: true},
-		{name: "EqualMaskedInt8x64", argLength: 3, commutative: true},
-		{name: "EqualMaskedInt16x8", argLength: 3, commutative: true},
-		{name: "EqualMaskedInt16x16", argLength: 3, commutative: true},
-		{name: "EqualMaskedInt16x32", argLength: 3, commutative: true},
-		{name: "EqualMaskedInt32x4", argLength: 3, commutative: true},
-		{name: "EqualMaskedInt32x8", argLength: 3, commutative: true},
-		{name: "EqualMaskedInt32x16", argLength: 3, commutative: true},
-		{name: "EqualMaskedInt64x2", argLength: 3, commutative: true},
-		{name: "EqualMaskedInt64x4", argLength: 3, commutative: true},
-		{name: "EqualMaskedInt64x8", argLength: 3, commutative: true},
-		{name: "EqualMaskedUint8x16", argLength: 3, commutative: true},
-		{name: "EqualMaskedUint8x32", argLength: 3, commutative: true},
-		{name: "EqualMaskedUint8x64", argLength: 3, commutative: true},
-		{name: "EqualMaskedUint16x8", argLength: 3, commutative: true},
-		{name: "EqualMaskedUint16x16", argLength: 3, commutative: true},
-		{name: "EqualMaskedUint16x32", argLength: 3, commutative: true},
-		{name: "EqualMaskedUint32x4", argLength: 3, commutative: true},
-		{name: "EqualMaskedUint32x8", argLength: 3, commutative: true},
-		{name: "EqualMaskedUint32x16", argLength: 3, commutative: true},
-		{name: "EqualMaskedUint64x2", argLength: 3, commutative: true},
-		{name: "EqualMaskedUint64x4", argLength: 3, commutative: true},
-		{name: "EqualMaskedUint64x8", argLength: 3, commutative: true},
 		{name: "EqualUint8x16", argLength: 2, commutative: true},
 		{name: "EqualUint8x32", argLength: 2, commutative: true},
 		{name: "EqualUint8x64", argLength: 2, commutative: true},
@@ -462,9 +291,6 @@ func simdGenericOps() []opData {
 		{name: "FloorFloat32x8", argLength: 1, commutative: false},
 		{name: "FloorFloat64x2", argLength: 1, commutative: false},
 		{name: "FloorFloat64x4", argLength: 1, commutative: false},
-		{name: "GaloisFieldMulMaskedUint8x16", argLength: 3, commutative: false},
-		{name: "GaloisFieldMulMaskedUint8x32", argLength: 3, commutative: false},
-		{name: "GaloisFieldMulMaskedUint8x64", argLength: 3, commutative: false},
 		{name: "GaloisFieldMulUint8x16", argLength: 2, commutative: false},
 		{name: "GaloisFieldMulUint8x32", argLength: 2, commutative: false},
 		{name: "GaloisFieldMulUint8x64", argLength: 2, commutative: false},
@@ -518,36 +344,6 @@ func simdGenericOps() []opData {
 		{name: "GreaterEqualInt16x32", argLength: 2, commutative: false},
 		{name: "GreaterEqualInt32x16", argLength: 2, commutative: false},
 		{name: "GreaterEqualInt64x8", argLength: 2, commutative: false},
-		{name: "GreaterEqualMaskedFloat32x4", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedFloat32x8", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedFloat32x16", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedFloat64x2", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedFloat64x4", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedFloat64x8", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedInt8x16", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedInt8x32", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedInt8x64", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedInt16x8", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedInt16x16", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedInt16x32", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedInt32x4", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedUint8x16", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedUint8x32", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedUint8x64", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedUint16x8", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedUint16x16", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedUint16x32", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedUint32x4", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedUint64x2", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "GreaterEqualMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "GreaterEqualUint8x64", argLength: 2, commutative: false},
 		{name: "GreaterEqualUint16x32", argLength: 2, commutative: false},
 		{name: "GreaterEqualUint32x16", argLength: 2, commutative: false},
@@ -570,36 +366,6 @@ func simdGenericOps() []opData {
 		{name: "GreaterInt64x2", argLength: 2, commutative: false},
 		{name: "GreaterInt64x4", argLength: 2, commutative: false},
 		{name: "GreaterInt64x8", argLength: 2, commutative: false},
-		{name: "GreaterMaskedFloat32x4", argLength: 3, commutative: false},
-		{name: "GreaterMaskedFloat32x8", argLength: 3, commutative: false},
-		{name: "GreaterMaskedFloat32x16", argLength: 3, commutative: false},
-		{name: "GreaterMaskedFloat64x2", argLength: 3, commutative: false},
-		{name: "GreaterMaskedFloat64x4", argLength: 3, commutative: false},
-		{name: "GreaterMaskedFloat64x8", argLength: 3, commutative: false},
-		{name: "GreaterMaskedInt8x16", argLength: 3, commutative: false},
-		{name: "GreaterMaskedInt8x32", argLength: 3, commutative: false},
-		{name: "GreaterMaskedInt8x64", argLength: 3, commutative: false},
-		{name: "GreaterMaskedInt16x8", argLength: 3, commutative: false},
-		{name: "GreaterMaskedInt16x16", argLength: 3, commutative: false},
-		{name: "GreaterMaskedInt16x32", argLength: 3, commutative: false},
-		{name: "GreaterMaskedInt32x4", argLength: 3, commutative: false},
-		{name: "GreaterMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "GreaterMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "GreaterMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "GreaterMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "GreaterMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "GreaterMaskedUint8x16", argLength: 3, commutative: false},
-		{name: "GreaterMaskedUint8x32", argLength: 3, commutative: false},
-		{name: "GreaterMaskedUint8x64", argLength: 3, commutative: false},
-		{name: "GreaterMaskedUint16x8", argLength: 3, commutative: false},
-		{name: "GreaterMaskedUint16x16", argLength: 3, commutative: false},
-		{name: "GreaterMaskedUint16x32", argLength: 3, commutative: false},
-		{name: "GreaterMaskedUint32x4", argLength: 3, commutative: false},
-		{name: "GreaterMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "GreaterMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "GreaterMaskedUint64x2", argLength: 3, commutative: false},
-		{name: "GreaterMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "GreaterMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "GreaterUint8x64", argLength: 2, commutative: false},
 		{name: "GreaterUint16x32", argLength: 2, commutative: false},
 		{name: "GreaterUint32x16", argLength: 2, commutative: false},
@@ -610,12 +376,6 @@ func simdGenericOps() []opData {
 		{name: "IsNanFloat64x2", argLength: 2, commutative: true},
 		{name: "IsNanFloat64x4", argLength: 2, commutative: true},
 		{name: "IsNanFloat64x8", argLength: 2, commutative: true},
-		{name: "IsNanMaskedFloat32x4", argLength: 3, commutative: true},
-		{name: "IsNanMaskedFloat32x8", argLength: 3, commutative: true},
-		{name: "IsNanMaskedFloat32x16", argLength: 3, commutative: true},
-		{name: "IsNanMaskedFloat64x2", argLength: 3, commutative: true},
-		{name: "IsNanMaskedFloat64x4", argLength: 3, commutative: true},
-		{name: "IsNanMaskedFloat64x8", argLength: 3, commutative: true},
 		{name: "LessEqualFloat32x4", argLength: 2, commutative: false},
 		{name: "LessEqualFloat32x8", argLength: 2, commutative: false},
 		{name: "LessEqualFloat32x16", argLength: 2, commutative: false},
@@ -626,36 +386,6 @@ func simdGenericOps() []opData {
 		{name: "LessEqualInt16x32", argLength: 2, commutative: false},
 		{name: "LessEqualInt32x16", argLength: 2, commutative: false},
 		{name: "LessEqualInt64x8", argLength: 2, commutative: false},
-		{name: "LessEqualMaskedFloat32x4", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedFloat32x8", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedFloat32x16", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedFloat64x2", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedFloat64x4", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedFloat64x8", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedInt8x16", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedInt8x32", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedInt8x64", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedInt16x8", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedInt16x16", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedInt16x32", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedInt32x4", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedUint8x16", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedUint8x32", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedUint8x64", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedUint16x8", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedUint16x16", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedUint16x32", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedUint32x4", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedUint64x2", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "LessEqualMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "LessEqualUint8x64", argLength: 2, commutative: false},
 		{name: "LessEqualUint16x32", argLength: 2, commutative: false},
 		{name: "LessEqualUint32x16", argLength: 2, commutative: false},
@@ -670,36 +400,6 @@ func simdGenericOps() []opData {
 		{name: "LessInt16x32", argLength: 2, commutative: false},
 		{name: "LessInt32x16", argLength: 2, commutative: false},
 		{name: "LessInt64x8", argLength: 2, commutative: false},
-		{name: "LessMaskedFloat32x4", argLength: 3, commutative: false},
-		{name: "LessMaskedFloat32x8", argLength: 3, commutative: false},
-		{name: "LessMaskedFloat32x16", argLength: 3, commutative: false},
-		{name: "LessMaskedFloat64x2", argLength: 3, commutative: false},
-		{name: "LessMaskedFloat64x4", argLength: 3, commutative: false},
-		{name: "LessMaskedFloat64x8", argLength: 3, commutative: false},
-		{name: "LessMaskedInt8x16", argLength: 3, commutative: false},
-		{name: "LessMaskedInt8x32", argLength: 3, commutative: false},
-		{name: "LessMaskedInt8x64", argLength: 3, commutative: false},
-		{name: "LessMaskedInt16x8", argLength: 3, commutative: false},
-		{name: "LessMaskedInt16x16", argLength: 3, commutative: false},
-		{name: "LessMaskedInt16x32", argLength: 3, commutative: false},
-		{name: "LessMaskedInt32x4", argLength: 3, commutative: false},
-		{name: "LessMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "LessMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "LessMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "LessMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "LessMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "LessMaskedUint8x16", argLength: 3, commutative: false},
-		{name: "LessMaskedUint8x32", argLength: 3, commutative: false},
-		{name: "LessMaskedUint8x64", argLength: 3, commutative: false},
-		{name: "LessMaskedUint16x8", argLength: 3, commutative: false},
-		{name: "LessMaskedUint16x16", argLength: 3, commutative: false},
-		{name: "LessMaskedUint16x32", argLength: 3, commutative: false},
-		{name: "LessMaskedUint32x4", argLength: 3, commutative: false},
-		{name: "LessMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "LessMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "LessMaskedUint64x2", argLength: 3, commutative: false},
-		{name: "LessMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "LessMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "LessUint8x64", argLength: 2, commutative: false},
 		{name: "LessUint16x32", argLength: 2, commutative: false},
 		{name: "LessUint32x16", argLength: 2, commutative: false},
@@ -722,36 +422,6 @@ func simdGenericOps() []opData {
 		{name: "MaxInt64x2", argLength: 2, commutative: true},
 		{name: "MaxInt64x4", argLength: 2, commutative: true},
 		{name: "MaxInt64x8", argLength: 2, commutative: true},
-		{name: "MaxMaskedFloat32x4", argLength: 3, commutative: true},
-		{name: "MaxMaskedFloat32x8", argLength: 3, commutative: true},
-		{name: "MaxMaskedFloat32x16", argLength: 3, commutative: true},
-		{name: "MaxMaskedFloat64x2", argLength: 3, commutative: true},
-		{name: "MaxMaskedFloat64x4", argLength: 3, commutative: true},
-		{name: "MaxMaskedFloat64x8", argLength: 3, commutative: true},
-		{name: "MaxMaskedInt8x16", argLength: 3, commutative: true},
-		{name: "MaxMaskedInt8x32", argLength: 3, commutative: true},
-		{name: "MaxMaskedInt8x64", argLength: 3, commutative: true},
-		{name: "MaxMaskedInt16x8", argLength: 3, commutative: true},
-		{name: "MaxMaskedInt16x16", argLength: 3, commutative: true},
-		{name: "MaxMaskedInt16x32", argLength: 3, commutative: true},
-		{name: "MaxMaskedInt32x4", argLength: 3, commutative: true},
-		{name: "MaxMaskedInt32x8", argLength: 3, commutative: true},
-		{name: "MaxMaskedInt32x16", argLength: 3, commutative: true},
-		{name: "MaxMaskedInt64x2", argLength: 3, commutative: true},
-		{name: "MaxMaskedInt64x4", argLength: 3, commutative: true},
-		{name: "MaxMaskedInt64x8", argLength: 3, commutative: true},
-		{name: "MaxMaskedUint8x16", argLength: 3, commutative: true},
-		{name: "MaxMaskedUint8x32", argLength: 3, commutative: true},
-		{name: "MaxMaskedUint8x64", argLength: 3, commutative: true},
-		{name: "MaxMaskedUint16x8", argLength: 3, commutative: true},
-		{name: "MaxMaskedUint16x16", argLength: 3, commutative: true},
-		{name: "MaxMaskedUint16x32", argLength: 3, commutative: true},
-		{name: "MaxMaskedUint32x4", argLength: 3, commutative: true},
-		{name: "MaxMaskedUint32x8", argLength: 3, commutative: true},
-		{name: "MaxMaskedUint32x16", argLength: 3, commutative: true},
-		{name: "MaxMaskedUint64x2", argLength: 3, commutative: true},
-		{name: "MaxMaskedUint64x4", argLength: 3, commutative: true},
-		{name: "MaxMaskedUint64x8", argLength: 3, commutative: true},
 		{name: "MaxUint8x16", argLength: 2, commutative: true},
 		{name: "MaxUint8x32", argLength: 2, commutative: true},
 		{name: "MaxUint8x64", argLength: 2, commutative: true},
@@ -782,36 +452,6 @@ func simdGenericOps() []opData {
 		{name: "MinInt64x2", argLength: 2, commutative: true},
 		{name: "MinInt64x4", argLength: 2, commutative: true},
 		{name: "MinInt64x8", argLength: 2, commutative: true},
-		{name: "MinMaskedFloat32x4", argLength: 3, commutative: true},
-		{name: "MinMaskedFloat32x8", argLength: 3, commutative: true},
-		{name: "MinMaskedFloat32x16", argLength: 3, commutative: true},
-		{name: "MinMaskedFloat64x2", argLength: 3, commutative: true},
-		{name: "MinMaskedFloat64x4", argLength: 3, commutative: true},
-		{name: "MinMaskedFloat64x8", argLength: 3, commutative: true},
-		{name: "MinMaskedInt8x16", argLength: 3, commutative: true},
-		{name: "MinMaskedInt8x32", argLength: 3, commutative: true},
-		{name: "MinMaskedInt8x64", argLength: 3, commutative: true},
-		{name: "MinMaskedInt16x8", argLength: 3, commutative: true},
-		{name: "MinMaskedInt16x16", argLength: 3, commutative: true},
-		{name: "MinMaskedInt16x32", argLength: 3, commutative: true},
-		{name: "MinMaskedInt32x4", argLength: 3, commutative: true},
-		{name: "MinMaskedInt32x8", argLength: 3, commutative: true},
-		{name: "MinMaskedInt32x16", argLength: 3, commutative: true},
-		{name: "MinMaskedInt64x2", argLength: 3, commutative: true},
-		{name: "MinMaskedInt64x4", argLength: 3, commutative: true},
-		{name: "MinMaskedInt64x8", argLength: 3, commutative: true},
-		{name: "MinMaskedUint8x16", argLength: 3, commutative: true},
-		{name: "MinMaskedUint8x32", argLength: 3, commutative: true},
-		{name: "MinMaskedUint8x64", argLength: 3, commutative: true},
-		{name: "MinMaskedUint16x8", argLength: 3, commutative: true},
-		{name: "MinMaskedUint16x16", argLength: 3, commutative: true},
-		{name: "MinMaskedUint16x32", argLength: 3, commutative: true},
-		{name: "MinMaskedUint32x4", argLength: 3, commutative: true},
-		{name: "MinMaskedUint32x8", argLength: 3, commutative: true},
-		{name: "MinMaskedUint32x16", argLength: 3, commutative: true},
-		{name: "MinMaskedUint64x2", argLength: 3, commutative: true},
-		{name: "MinMaskedUint64x4", argLength: 3, commutative: true},
-		{name: "MinMaskedUint64x8", argLength: 3, commutative: true},
 		{name: "MinUint8x16", argLength: 2, commutative: true},
 		{name: "MinUint8x32", argLength: 2, commutative: true},
 		{name: "MinUint8x64", argLength: 2, commutative: true},
@@ -830,24 +470,12 @@ func simdGenericOps() []opData {
 		{name: "MulAddFloat64x2", argLength: 3, commutative: false},
 		{name: "MulAddFloat64x4", argLength: 3, commutative: false},
 		{name: "MulAddFloat64x8", argLength: 3, commutative: false},
-		{name: "MulAddMaskedFloat32x4", argLength: 4, commutative: false},
-		{name: "MulAddMaskedFloat32x8", argLength: 4, commutative: false},
-		{name: "MulAddMaskedFloat32x16", argLength: 4, commutative: false},
-		{name: "MulAddMaskedFloat64x2", argLength: 4, commutative: false},
-		{name: "MulAddMaskedFloat64x4", argLength: 4, commutative: false},
-		{name: "MulAddMaskedFloat64x8", argLength: 4, commutative: false},
 		{name: "MulAddSubFloat32x4", argLength: 3, commutative: false},
 		{name: "MulAddSubFloat32x8", argLength: 3, commutative: false},
 		{name: "MulAddSubFloat32x16", argLength: 3, commutative: false},
 		{name: "MulAddSubFloat64x2", argLength: 3, commutative: false},
 		{name: "MulAddSubFloat64x4", argLength: 3, commutative: false},
 		{name: "MulAddSubFloat64x8", argLength: 3, commutative: false},
-		{name: "MulAddSubMaskedFloat32x4", argLength: 4, commutative: false},
-		{name: "MulAddSubMaskedFloat32x8", argLength: 4, commutative: false},
-		{name: "MulAddSubMaskedFloat32x16", argLength: 4, commutative: false},
-		{name: "MulAddSubMaskedFloat64x2", argLength: 4, commutative: false},
-		{name: "MulAddSubMaskedFloat64x4", argLength: 4, commutative: false},
-		{name: "MulAddSubMaskedFloat64x8", argLength: 4, commutative: false},
 		{name: "MulEvenWidenInt32x4", argLength: 2, commutative: true},
 		{name: "MulEvenWidenInt32x8", argLength: 2, commutative: true},
 		{name: "MulEvenWidenUint32x4", argLength: 2, commutative: true},
@@ -861,12 +489,6 @@ func simdGenericOps() []opData {
 		{name: "MulHighInt16x8", argLength: 2, commutative: true},
 		{name: "MulHighInt16x16", argLength: 2, commutative: true},
 		{name: "MulHighInt16x32", argLength: 2, commutative: true},
-		{name: "MulHighMaskedInt16x8", argLength: 3, commutative: true},
-		{name: "MulHighMaskedInt16x16", argLength: 3, commutative: true},
-		{name: "MulHighMaskedInt16x32", argLength: 3, commutative: true},
-		{name: "MulHighMaskedUint16x8", argLength: 3, commutative: true},
-		{name: "MulHighMaskedUint16x16", argLength: 3, commutative: true},
-		{name: "MulHighMaskedUint16x32", argLength: 3, commutative: true},
 		{name: "MulHighUint16x8", argLength: 2, commutative: true},
 		{name: "MulHighUint16x16", argLength: 2, commutative: true},
 		{name: "MulHighUint16x32", argLength: 2, commutative: true},
@@ -879,42 +501,12 @@ func simdGenericOps() []opData {
 		{name: "MulInt64x2", argLength: 2, commutative: true},
 		{name: "MulInt64x4", argLength: 2, commutative: true},
 		{name: "MulInt64x8", argLength: 2, commutative: true},
-		{name: "MulMaskedFloat32x4", argLength: 3, commutative: true},
-		{name: "MulMaskedFloat32x8", argLength: 3, commutative: true},
-		{name: "MulMaskedFloat32x16", argLength: 3, commutative: true},
-		{name: "MulMaskedFloat64x2", argLength: 3, commutative: true},
-		{name: "MulMaskedFloat64x4", argLength: 3, commutative: true},
-		{name: "MulMaskedFloat64x8", argLength: 3, commutative: true},
-		{name: "MulMaskedInt16x8", argLength: 3, commutative: true},
-		{name: "MulMaskedInt16x16", argLength: 3, commutative: true},
-		{name: "MulMaskedInt16x32", argLength: 3, commutative: true},
-		{name: "MulMaskedInt32x4", argLength: 3, commutative: true},
-		{name: "MulMaskedInt32x8", argLength: 3, commutative: true},
-		{name: "MulMaskedInt32x16", argLength: 3, commutative: true},
-		{name: "MulMaskedInt64x2", argLength: 3, commutative: true},
-		{name: "MulMaskedInt64x4", argLength: 3, commutative: true},
-		{name: "MulMaskedInt64x8", argLength: 3, commutative: true},
-		{name: "MulMaskedUint16x8", argLength: 3, commutative: true},
-		{name: "MulMaskedUint16x16", argLength: 3, commutative: true},
-		{name: "MulMaskedUint16x32", argLength: 3, commutative: true},
-		{name: "MulMaskedUint32x4", argLength: 3, commutative: true},
-		{name: "MulMaskedUint32x8", argLength: 3, commutative: true},
-		{name: "MulMaskedUint32x16", argLength: 3, commutative: true},
-		{name: "MulMaskedUint64x2", argLength: 3, commutative: true},
-		{name: "MulMaskedUint64x4", argLength: 3, commutative: true},
-		{name: "MulMaskedUint64x8", argLength: 3, commutative: true},
 		{name: "MulSubAddFloat32x4", argLength: 3, commutative: false},
 		{name: "MulSubAddFloat32x8", argLength: 3, commutative: false},
 		{name: "MulSubAddFloat32x16", argLength: 3, commutative: false},
 		{name: "MulSubAddFloat64x2", argLength: 3, commutative: false},
 		{name: "MulSubAddFloat64x4", argLength: 3, commutative: false},
 		{name: "MulSubAddFloat64x8", argLength: 3, commutative: false},
-		{name: "MulSubAddMaskedFloat32x4", argLength: 4, commutative: false},
-		{name: "MulSubAddMaskedFloat32x8", argLength: 4, commutative: false},
-		{name: "MulSubAddMaskedFloat32x16", argLength: 4, commutative: false},
-		{name: "MulSubAddMaskedFloat64x2", argLength: 4, commutative: false},
-		{name: "MulSubAddMaskedFloat64x4", argLength: 4, commutative: false},
-		{name: "MulSubAddMaskedFloat64x8", argLength: 4, commutative: false},
 		{name: "MulUint16x8", argLength: 2, commutative: true},
 		{name: "MulUint16x16", argLength: 2, commutative: true},
 		{name: "MulUint16x32", argLength: 2, commutative: true},
@@ -934,36 +526,6 @@ func simdGenericOps() []opData {
 		{name: "NotEqualInt16x32", argLength: 2, commutative: true},
 		{name: "NotEqualInt32x16", argLength: 2, commutative: true},
 		{name: "NotEqualInt64x8", argLength: 2, commutative: true},
-		{name: "NotEqualMaskedFloat32x4", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedFloat32x8", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedFloat32x16", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedFloat64x2", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedFloat64x4", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedFloat64x8", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedInt8x16", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedInt8x32", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedInt8x64", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedInt16x8", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedInt16x16", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedInt16x32", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedInt32x4", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedInt32x8", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedInt32x16", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedInt64x2", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedInt64x4", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedInt64x8", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedUint8x16", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedUint8x32", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedUint8x64", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedUint16x8", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedUint16x16", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedUint16x32", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedUint32x4", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedUint32x8", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedUint32x16", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedUint64x2", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedUint64x4", argLength: 3, commutative: true},
-		{name: "NotEqualMaskedUint64x8", argLength: 3, commutative: true},
 		{name: "NotEqualUint8x64", argLength: 2, commutative: true},
 		{name: "NotEqualUint16x32", argLength: 2, commutative: true},
 		{name: "NotEqualUint32x16", argLength: 2, commutative: true},
@@ -980,30 +542,6 @@ func simdGenericOps() []opData {
 		{name: "OnesCountInt64x2", argLength: 1, commutative: false},
 		{name: "OnesCountInt64x4", argLength: 1, commutative: false},
 		{name: "OnesCountInt64x8", argLength: 1, commutative: false},
-		{name: "OnesCountMaskedInt8x16", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedInt8x32", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedInt8x64", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedInt16x8", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedInt16x16", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedInt16x32", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedInt32x4", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedInt32x8", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedInt32x16", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedInt64x2", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedInt64x4", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedInt64x8", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedUint8x16", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedUint8x32", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedUint8x64", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedUint16x8", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedUint16x16", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedUint16x32", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedUint32x4", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedUint32x8", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedUint32x16", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedUint64x2", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedUint64x4", argLength: 2, commutative: false},
-		{name: "OnesCountMaskedUint64x8", argLength: 2, commutative: false},
 		{name: "OnesCountUint8x16", argLength: 1, commutative: false},
 		{name: "OnesCountUint8x32", argLength: 1, commutative: false},
 		{name: "OnesCountUint8x64", argLength: 1, commutative: false},
@@ -1028,18 +566,6 @@ func simdGenericOps() []opData {
 		{name: "OrInt64x2", argLength: 2, commutative: true},
 		{name: "OrInt64x4", argLength: 2, commutative: true},
 		{name: "OrInt64x8", argLength: 2, commutative: true},
-		{name: "OrMaskedInt32x4", argLength: 3, commutative: true},
-		{name: "OrMaskedInt32x8", argLength: 3, commutative: true},
-		{name: "OrMaskedInt32x16", argLength: 3, commutative: true},
-		{name: "OrMaskedInt64x2", argLength: 3, commutative: true},
-		{name: "OrMaskedInt64x4", argLength: 3, commutative: true},
-		{name: "OrMaskedInt64x8", argLength: 3, commutative: true},
-		{name: "OrMaskedUint32x4", argLength: 3, commutative: true},
-		{name: "OrMaskedUint32x8", argLength: 3, commutative: true},
-		{name: "OrMaskedUint32x16", argLength: 3, commutative: true},
-		{name: "OrMaskedUint64x2", argLength: 3, commutative: true},
-		{name: "OrMaskedUint64x4", argLength: 3, commutative: true},
-		{name: "OrMaskedUint64x8", argLength: 3, commutative: true},
 		{name: "OrUint8x16", argLength: 2, commutative: true},
 		{name: "OrUint8x32", argLength: 2, commutative: true},
 		{name: "OrUint8x64", argLength: 2, commutative: true},
@@ -1070,36 +596,6 @@ func simdGenericOps() []opData {
 		{name: "Permute2Int64x2", argLength: 3, commutative: false},
 		{name: "Permute2Int64x4", argLength: 3, commutative: false},
 		{name: "Permute2Int64x8", argLength: 3, commutative: false},
-		{name: "Permute2MaskedFloat32x4", argLength: 4, commutative: false},
-		{name: "Permute2MaskedFloat32x8", argLength: 4, commutative: false},
-		{name: "Permute2MaskedFloat32x16", argLength: 4, commutative: false},
-		{name: "Permute2MaskedFloat64x2", argLength: 4, commutative: false},
-		{name: "Permute2MaskedFloat64x4", argLength: 4, commutative: false},
-		{name: "Permute2MaskedFloat64x8", argLength: 4, commutative: false},
-		{name: "Permute2MaskedInt8x16", argLength: 4, commutative: false},
-		{name: "Permute2MaskedInt8x32", argLength: 4, commutative: false},
-		{name: "Permute2MaskedInt8x64", argLength: 4, commutative: false},
-		{name: "Permute2MaskedInt16x8", argLength: 4, commutative: false},
-		{name: "Permute2MaskedInt16x16", argLength: 4, commutative: false},
-		{name: "Permute2MaskedInt16x32", argLength: 4, commutative: false},
-		{name: "Permute2MaskedInt32x4", argLength: 4, commutative: false},
-		{name: "Permute2MaskedInt32x8", argLength: 4, commutative: false},
-		{name: "Permute2MaskedInt32x16", argLength: 4, commutative: false},
-		{name: "Permute2MaskedInt64x2", argLength: 4, commutative: false},
-		{name: "Permute2MaskedInt64x4", argLength: 4, commutative: false},
-		{name: "Permute2MaskedInt64x8", argLength: 4, commutative: false},
-		{name: "Permute2MaskedUint8x16", argLength: 4, commutative: false},
-		{name: "Permute2MaskedUint8x32", argLength: 4, commutative: false},
-		{name: "Permute2MaskedUint8x64", argLength: 4, commutative: false},
-		{name: "Permute2MaskedUint16x8", argLength: 4, commutative: false},
-		{name: "Permute2MaskedUint16x16", argLength: 4, commutative: false},
-		{name: "Permute2MaskedUint16x32", argLength: 4, commutative: false},
-		{name: "Permute2MaskedUint32x4", argLength: 4, commutative: false},
-		{name: "Permute2MaskedUint32x8", argLength: 4, commutative: false},
-		{name: "Permute2MaskedUint32x16", argLength: 4, commutative: false},
-		{name: "Permute2MaskedUint64x2", argLength: 4, commutative: false},
-		{name: "Permute2MaskedUint64x4", argLength: 4, commutative: false},
-		{name: "Permute2MaskedUint64x8", argLength: 4, commutative: false},
 		{name: "Permute2Uint8x16", argLength: 3, commutative: false},
 		{name: "Permute2Uint8x32", argLength: 3, commutative: false},
 		{name: "Permute2Uint8x64", argLength: 3, commutative: false},
@@ -1126,30 +622,6 @@ func simdGenericOps() []opData {
 		{name: "PermuteInt32x16", argLength: 2, commutative: false},
 		{name: "PermuteInt64x4", argLength: 2, commutative: false},
 		{name: "PermuteInt64x8", argLength: 2, commutative: false},
-		{name: "PermuteMaskedFloat32x8", argLength: 3, commutative: false},
-		{name: "PermuteMaskedFloat32x16", argLength: 3, commutative: false},
-		{name: "PermuteMaskedFloat64x4", argLength: 3, commutative: false},
-		{name: "PermuteMaskedFloat64x8", argLength: 3, commutative: false},
-		{name: "PermuteMaskedInt8x16", argLength: 3, commutative: false},
-		{name: "PermuteMaskedInt8x32", argLength: 3, commutative: false},
-		{name: "PermuteMaskedInt8x64", argLength: 3, commutative: false},
-		{name: "PermuteMaskedInt16x8", argLength: 3, commutative: false},
-		{name: "PermuteMaskedInt16x16", argLength: 3, commutative: false},
-		{name: "PermuteMaskedInt16x32", argLength: 3, commutative: false},
-		{name: "PermuteMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "PermuteMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "PermuteMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "PermuteMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "PermuteMaskedUint8x16", argLength: 3, commutative: false},
-		{name: "PermuteMaskedUint8x32", argLength: 3, commutative: false},
-		{name: "PermuteMaskedUint8x64", argLength: 3, commutative: false},
-		{name: "PermuteMaskedUint16x8", argLength: 3, commutative: false},
-		{name: "PermuteMaskedUint16x16", argLength: 3, commutative: false},
-		{name: "PermuteMaskedUint16x32", argLength: 3, commutative: false},
-		{name: "PermuteMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "PermuteMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "PermuteMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "PermuteMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "PermuteUint8x16", argLength: 2, commutative: false},
 		{name: "PermuteUint8x32", argLength: 2, commutative: false},
 		{name: "PermuteUint8x64", argLength: 2, commutative: false},
@@ -1166,42 +638,18 @@ func simdGenericOps() []opData {
 		{name: "ReciprocalFloat64x2", argLength: 1, commutative: false},
 		{name: "ReciprocalFloat64x4", argLength: 1, commutative: false},
 		{name: "ReciprocalFloat64x8", argLength: 1, commutative: false},
-		{name: "ReciprocalMaskedFloat32x4", argLength: 2, commutative: false},
-		{name: "ReciprocalMaskedFloat32x8", argLength: 2, commutative: false},
-		{name: "ReciprocalMaskedFloat32x16", argLength: 2, commutative: false},
-		{name: "ReciprocalMaskedFloat64x2", argLength: 2, commutative: false},
-		{name: "ReciprocalMaskedFloat64x4", argLength: 2, commutative: false},
-		{name: "ReciprocalMaskedFloat64x8", argLength: 2, commutative: false},
 		{name: "ReciprocalSqrtFloat32x4", argLength: 1, commutative: false},
 		{name: "ReciprocalSqrtFloat32x8", argLength: 1, commutative: false},
 		{name: "ReciprocalSqrtFloat32x16", argLength: 1, commutative: false},
 		{name: "ReciprocalSqrtFloat64x2", argLength: 1, commutative: false},
 		{name: "ReciprocalSqrtFloat64x4", argLength: 1, commutative: false},
 		{name: "ReciprocalSqrtFloat64x8", argLength: 1, commutative: false},
-		{name: "ReciprocalSqrtMaskedFloat32x4", argLength: 2, commutative: false},
-		{name: "ReciprocalSqrtMaskedFloat32x8", argLength: 2, commutative: false},
-		{name: "ReciprocalSqrtMaskedFloat32x16", argLength: 2, commutative: false},
-		{name: "ReciprocalSqrtMaskedFloat64x2", argLength: 2, commutative: false},
-		{name: "ReciprocalSqrtMaskedFloat64x4", argLength: 2, commutative: false},
-		{name: "ReciprocalSqrtMaskedFloat64x8", argLength: 2, commutative: false},
 		{name: "RotateLeftInt32x4", argLength: 2, commutative: false},
 		{name: "RotateLeftInt32x8", argLength: 2, commutative: false},
 		{name: "RotateLeftInt32x16", argLength: 2, commutative: false},
 		{name: "RotateLeftInt64x2", argLength: 2, commutative: false},
 		{name: "RotateLeftInt64x4", argLength: 2, commutative: false},
 		{name: "RotateLeftInt64x8", argLength: 2, commutative: false},
-		{name: "RotateLeftMaskedInt32x4", argLength: 3, commutative: false},
-		{name: "RotateLeftMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "RotateLeftMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "RotateLeftMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "RotateLeftMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "RotateLeftMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "RotateLeftMaskedUint32x4", argLength: 3, commutative: false},
-		{name: "RotateLeftMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "RotateLeftMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "RotateLeftMaskedUint64x2", argLength: 3, commutative: false},
-		{name: "RotateLeftMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "RotateLeftMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "RotateLeftUint32x4", argLength: 2, commutative: false},
 		{name: "RotateLeftUint32x8", argLength: 2, commutative: false},
 		{name: "RotateLeftUint32x16", argLength: 2, commutative: false},
@@ -1214,18 +662,6 @@ func simdGenericOps() []opData {
 		{name: "RotateRightInt64x2", argLength: 2, commutative: false},
 		{name: "RotateRightInt64x4", argLength: 2, commutative: false},
 		{name: "RotateRightInt64x8", argLength: 2, commutative: false},
-		{name: "RotateRightMaskedInt32x4", argLength: 3, commutative: false},
-		{name: "RotateRightMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "RotateRightMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "RotateRightMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "RotateRightMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "RotateRightMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "RotateRightMaskedUint32x4", argLength: 3, commutative: false},
-		{name: "RotateRightMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "RotateRightMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "RotateRightMaskedUint64x2", argLength: 3, commutative: false},
-		{name: "RotateRightMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "RotateRightMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "RotateRightUint32x4", argLength: 2, commutative: false},
 		{name: "RotateRightUint32x8", argLength: 2, commutative: false},
 		{name: "RotateRightUint32x16", argLength: 2, commutative: false},
@@ -1242,12 +678,6 @@ func simdGenericOps() []opData {
 		{name: "ScaleFloat64x2", argLength: 2, commutative: false},
 		{name: "ScaleFloat64x4", argLength: 2, commutative: false},
 		{name: "ScaleFloat64x8", argLength: 2, commutative: false},
-		{name: "ScaleMaskedFloat32x4", argLength: 3, commutative: false},
-		{name: "ScaleMaskedFloat32x8", argLength: 3, commutative: false},
-		{name: "ScaleMaskedFloat32x16", argLength: 3, commutative: false},
-		{name: "ScaleMaskedFloat64x2", argLength: 3, commutative: false},
-		{name: "ScaleMaskedFloat64x4", argLength: 3, commutative: false},
-		{name: "ScaleMaskedFloat64x8", argLength: 3, commutative: false},
 		{name: "SetHiFloat32x8", argLength: 2, commutative: false},
 		{name: "SetHiFloat32x16", argLength: 2, commutative: false},
 		{name: "SetHiFloat64x4", argLength: 2, commutative: false},
@@ -1297,24 +727,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftAllLeftInt64x2", argLength: 2, commutative: false},
 		{name: "ShiftAllLeftInt64x4", argLength: 2, commutative: false},
 		{name: "ShiftAllLeftInt64x8", argLength: 2, commutative: false},
-		{name: "ShiftAllLeftMaskedInt16x8", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedInt16x16", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedInt16x32", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedInt32x4", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedUint16x8", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedUint16x16", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedUint16x32", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedUint32x4", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedUint64x2", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "ShiftAllLeftMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "ShiftAllLeftUint16x8", argLength: 2, commutative: false},
 		{name: "ShiftAllLeftUint16x16", argLength: 2, commutative: false},
 		{name: "ShiftAllLeftUint16x32", argLength: 2, commutative: false},
@@ -1333,24 +745,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftAllRightInt64x2", argLength: 2, commutative: false},
 		{name: "ShiftAllRightInt64x4", argLength: 2, commutative: false},
 		{name: "ShiftAllRightInt64x8", argLength: 2, commutative: false},
-		{name: "ShiftAllRightMaskedInt16x8", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedInt16x16", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedInt16x32", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedInt32x4", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedUint16x8", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedUint16x16", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedUint16x32", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedUint32x4", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedUint64x2", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "ShiftAllRightMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "ShiftAllRightUint16x8", argLength: 2, commutative: false},
 		{name: "ShiftAllRightUint16x16", argLength: 2, commutative: false},
 		{name: "ShiftAllRightUint16x32", argLength: 2, commutative: false},
@@ -1369,24 +763,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftLeftConcatInt64x2", argLength: 3, commutative: false},
 		{name: "ShiftLeftConcatInt64x4", argLength: 3, commutative: false},
 		{name: "ShiftLeftConcatInt64x8", argLength: 3, commutative: false},
-		{name: "ShiftLeftConcatMaskedInt16x8", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedInt16x16", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedInt16x32", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedInt32x4", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedInt32x8", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedInt32x16", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedInt64x2", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedInt64x4", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedInt64x8", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedUint16x8", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedUint16x16", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedUint16x32", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedUint32x4", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedUint32x8", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedUint32x16", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedUint64x2", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedUint64x4", argLength: 4, commutative: false},
-		{name: "ShiftLeftConcatMaskedUint64x8", argLength: 4, commutative: false},
 		{name: "ShiftLeftConcatUint16x8", argLength: 3, commutative: false},
 		{name: "ShiftLeftConcatUint16x16", argLength: 3, commutative: false},
 		{name: "ShiftLeftConcatUint16x32", argLength: 3, commutative: false},
@@ -1405,24 +781,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftLeftInt64x2", argLength: 2, commutative: false},
 		{name: "ShiftLeftInt64x4", argLength: 2, commutative: false},
 		{name: "ShiftLeftInt64x8", argLength: 2, commutative: false},
-		{name: "ShiftLeftMaskedInt16x8", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedInt16x16", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedInt16x32", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedInt32x4", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedUint16x8", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedUint16x16", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedUint16x32", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedUint32x4", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedUint64x2", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "ShiftLeftMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "ShiftLeftUint16x8", argLength: 2, commutative: false},
 		{name: "ShiftLeftUint16x16", argLength: 2, commutative: false},
 		{name: "ShiftLeftUint16x32", argLength: 2, commutative: false},
@@ -1441,24 +799,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightConcatInt64x2", argLength: 3, commutative: false},
 		{name: "ShiftRightConcatInt64x4", argLength: 3, commutative: false},
 		{name: "ShiftRightConcatInt64x8", argLength: 3, commutative: false},
-		{name: "ShiftRightConcatMaskedInt16x8", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedInt16x16", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedInt16x32", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedInt32x4", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedInt32x8", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedInt32x16", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedInt64x2", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedInt64x4", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedInt64x8", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedUint16x8", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedUint16x16", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedUint16x32", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedUint32x4", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedUint32x8", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedUint32x16", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedUint64x2", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedUint64x4", argLength: 4, commutative: false},
-		{name: "ShiftRightConcatMaskedUint64x8", argLength: 4, commutative: false},
 		{name: "ShiftRightConcatUint16x8", argLength: 3, commutative: false},
 		{name: "ShiftRightConcatUint16x16", argLength: 3, commutative: false},
 		{name: "ShiftRightConcatUint16x32", argLength: 3, commutative: false},
@@ -1477,24 +817,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightInt64x2", argLength: 2, commutative: false},
 		{name: "ShiftRightInt64x4", argLength: 2, commutative: false},
 		{name: "ShiftRightInt64x8", argLength: 2, commutative: false},
-		{name: "ShiftRightMaskedInt16x8", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedInt16x16", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedInt16x32", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedInt32x4", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedUint16x8", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedUint16x16", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedUint16x32", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedUint32x4", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedUint64x2", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "ShiftRightMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "ShiftRightUint16x8", argLength: 2, commutative: false},
 		{name: "ShiftRightUint16x16", argLength: 2, commutative: false},
 		{name: "ShiftRightUint16x32", argLength: 2, commutative: false},
@@ -1510,12 +832,6 @@ func simdGenericOps() []opData {
 		{name: "SqrtFloat64x2", argLength: 1, commutative: false},
 		{name: "SqrtFloat64x4", argLength: 1, commutative: false},
 		{name: "SqrtFloat64x8", argLength: 1, commutative: false},
-		{name: "SqrtMaskedFloat32x4", argLength: 2, commutative: false},
-		{name: "SqrtMaskedFloat32x8", argLength: 2, commutative: false},
-		{name: "SqrtMaskedFloat32x16", argLength: 2, commutative: false},
-		{name: "SqrtMaskedFloat64x2", argLength: 2, commutative: false},
-		{name: "SqrtMaskedFloat64x4", argLength: 2, commutative: false},
-		{name: "SqrtMaskedFloat64x8", argLength: 2, commutative: false},
 		{name: "SubFloat32x4", argLength: 2, commutative: false},
 		{name: "SubFloat32x8", argLength: 2, commutative: false},
 		{name: "SubFloat32x16", argLength: 2, commutative: false},
@@ -1534,36 +850,6 @@ func simdGenericOps() []opData {
 		{name: "SubInt64x2", argLength: 2, commutative: false},
 		{name: "SubInt64x4", argLength: 2, commutative: false},
 		{name: "SubInt64x8", argLength: 2, commutative: false},
-		{name: "SubMaskedFloat32x4", argLength: 3, commutative: false},
-		{name: "SubMaskedFloat32x8", argLength: 3, commutative: false},
-		{name: "SubMaskedFloat32x16", argLength: 3, commutative: false},
-		{name: "SubMaskedFloat64x2", argLength: 3, commutative: false},
-		{name: "SubMaskedFloat64x4", argLength: 3, commutative: false},
-		{name: "SubMaskedFloat64x8", argLength: 3, commutative: false},
-		{name: "SubMaskedInt8x16", argLength: 3, commutative: false},
-		{name: "SubMaskedInt8x32", argLength: 3, commutative: false},
-		{name: "SubMaskedInt8x64", argLength: 3, commutative: false},
-		{name: "SubMaskedInt16x8", argLength: 3, commutative: false},
-		{name: "SubMaskedInt16x16", argLength: 3, commutative: false},
-		{name: "SubMaskedInt16x32", argLength: 3, commutative: false},
-		{name: "SubMaskedInt32x4", argLength: 3, commutative: false},
-		{name: "SubMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "SubMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "SubMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "SubMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "SubMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "SubMaskedUint8x16", argLength: 3, commutative: false},
-		{name: "SubMaskedUint8x32", argLength: 3, commutative: false},
-		{name: "SubMaskedUint8x64", argLength: 3, commutative: false},
-		{name: "SubMaskedUint16x8", argLength: 3, commutative: false},
-		{name: "SubMaskedUint16x16", argLength: 3, commutative: false},
-		{name: "SubMaskedUint16x32", argLength: 3, commutative: false},
-		{name: "SubMaskedUint32x4", argLength: 3, commutative: false},
-		{name: "SubMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "SubMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "SubMaskedUint64x2", argLength: 3, commutative: false},
-		{name: "SubMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "SubMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "SubPairsFloat32x4", argLength: 2, commutative: false},
 		{name: "SubPairsFloat32x8", argLength: 2, commutative: false},
 		{name: "SubPairsFloat64x2", argLength: 2, commutative: false},
@@ -1584,18 +870,6 @@ func simdGenericOps() []opData {
 		{name: "SubSaturatedInt16x8", argLength: 2, commutative: false},
 		{name: "SubSaturatedInt16x16", argLength: 2, commutative: false},
 		{name: "SubSaturatedInt16x32", argLength: 2, commutative: false},
-		{name: "SubSaturatedMaskedInt8x16", argLength: 3, commutative: false},
-		{name: "SubSaturatedMaskedInt8x32", argLength: 3, commutative: false},
-		{name: "SubSaturatedMaskedInt8x64", argLength: 3, commutative: false},
-		{name: "SubSaturatedMaskedInt16x8", argLength: 3, commutative: false},
-		{name: "SubSaturatedMaskedInt16x16", argLength: 3, commutative: false},
-		{name: "SubSaturatedMaskedInt16x32", argLength: 3, commutative: false},
-		{name: "SubSaturatedMaskedUint8x16", argLength: 3, commutative: false},
-		{name: "SubSaturatedMaskedUint8x32", argLength: 3, commutative: false},
-		{name: "SubSaturatedMaskedUint8x64", argLength: 3, commutative: false},
-		{name: "SubSaturatedMaskedUint16x8", argLength: 3, commutative: false},
-		{name: "SubSaturatedMaskedUint16x16", argLength: 3, commutative: false},
-		{name: "SubSaturatedMaskedUint16x32", argLength: 3, commutative: false},
 		{name: "SubSaturatedUint8x16", argLength: 2, commutative: false},
 		{name: "SubSaturatedUint8x32", argLength: 2, commutative: false},
 		{name: "SubSaturatedUint8x64", argLength: 2, commutative: false},
@@ -1630,18 +904,6 @@ func simdGenericOps() []opData {
 		{name: "XorInt64x2", argLength: 2, commutative: true},
 		{name: "XorInt64x4", argLength: 2, commutative: true},
 		{name: "XorInt64x8", argLength: 2, commutative: true},
-		{name: "XorMaskedInt32x4", argLength: 3, commutative: true},
-		{name: "XorMaskedInt32x8", argLength: 3, commutative: true},
-		{name: "XorMaskedInt32x16", argLength: 3, commutative: true},
-		{name: "XorMaskedInt64x2", argLength: 3, commutative: true},
-		{name: "XorMaskedInt64x4", argLength: 3, commutative: true},
-		{name: "XorMaskedInt64x8", argLength: 3, commutative: true},
-		{name: "XorMaskedUint32x4", argLength: 3, commutative: true},
-		{name: "XorMaskedUint32x8", argLength: 3, commutative: true},
-		{name: "XorMaskedUint32x16", argLength: 3, commutative: true},
-		{name: "XorMaskedUint64x2", argLength: 3, commutative: true},
-		{name: "XorMaskedUint64x4", argLength: 3, commutative: true},
-		{name: "XorMaskedUint64x8", argLength: 3, commutative: true},
 		{name: "XorUint8x16", argLength: 2, commutative: true},
 		{name: "XorUint8x32", argLength: 2, commutative: true},
 		{name: "XorUint8x64", argLength: 2, commutative: true},
@@ -1666,57 +928,27 @@ func simdGenericOps() []opData {
 		{name: "CeilScaledFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "CeilScaledFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "CeilScaledFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-		{name: "CeilScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "CeilScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "CeilScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "CeilScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "CeilScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "CeilScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "CeilScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "CeilScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "CeilScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "CeilScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "CeilScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "CeilScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-		{name: "CeilScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "CeilScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "CeilScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "CeilScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "CeilScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "CeilScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "FloorScaledFloat32x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "FloorScaledFloat32x8", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "FloorScaledFloat32x16", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "FloorScaledFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "FloorScaledFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "FloorScaledFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-		{name: "FloorScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "FloorScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "FloorScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "FloorScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "FloorScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "FloorScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "FloorScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "FloorScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "FloorScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "FloorScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "FloorScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "FloorScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-		{name: "FloorScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "FloorScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "FloorScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "FloorScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "FloorScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "FloorScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "GaloisFieldAffineTransformInverseMaskedUint8x16", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "GaloisFieldAffineTransformInverseMaskedUint8x32", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "GaloisFieldAffineTransformInverseMaskedUint8x64", argLength: 3, commutative: false, aux: "UInt8"},
 		{name: "GaloisFieldAffineTransformInverseUint8x16", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "GaloisFieldAffineTransformInverseUint8x32", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "GaloisFieldAffineTransformInverseUint8x64", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "GaloisFieldAffineTransformMaskedUint8x16", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "GaloisFieldAffineTransformMaskedUint8x32", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "GaloisFieldAffineTransformMaskedUint8x64", argLength: 3, commutative: false, aux: "UInt8"},
 		{name: "GaloisFieldAffineTransformUint8x16", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "GaloisFieldAffineTransformUint8x32", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "GaloisFieldAffineTransformUint8x64", argLength: 2, commutative: false, aux: "UInt8"},
@@ -1736,18 +968,6 @@ func simdGenericOps() []opData {
 		{name: "RotateAllLeftInt64x2", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RotateAllLeftInt64x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RotateAllLeftInt64x8", argLength: 1, commutative: false, aux: "UInt8"},
-		{name: "RotateAllLeftMaskedInt32x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllLeftMaskedInt32x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllLeftMaskedInt32x16", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllLeftMaskedInt64x2", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllLeftMaskedInt64x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllLeftMaskedInt64x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllLeftMaskedUint32x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllLeftMaskedUint32x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllLeftMaskedUint32x16", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllLeftMaskedUint64x2", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllLeftMaskedUint64x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllLeftMaskedUint64x8", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "RotateAllLeftUint32x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RotateAllLeftUint32x8", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RotateAllLeftUint32x16", argLength: 1, commutative: false, aux: "UInt8"},
@@ -1760,18 +980,6 @@ func simdGenericOps() []opData {
 		{name: "RotateAllRightInt64x2", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RotateAllRightInt64x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RotateAllRightInt64x8", argLength: 1, commutative: false, aux: "UInt8"},
-		{name: "RotateAllRightMaskedInt32x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllRightMaskedInt32x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllRightMaskedInt32x16", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllRightMaskedInt64x2", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllRightMaskedInt64x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllRightMaskedInt64x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllRightMaskedUint32x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllRightMaskedUint32x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllRightMaskedUint32x16", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllRightMaskedUint64x2", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllRightMaskedUint64x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RotateAllRightMaskedUint64x8", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "RotateAllRightUint32x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RotateAllRightUint32x8", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RotateAllRightUint32x16", argLength: 1, commutative: false, aux: "UInt8"},
@@ -1784,24 +992,12 @@ func simdGenericOps() []opData {
 		{name: "RoundToEvenScaledFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RoundToEvenScaledFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RoundToEvenScaledFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-		{name: "RoundToEvenScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RoundToEvenScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RoundToEvenScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RoundToEvenScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RoundToEvenScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RoundToEvenScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "RoundToEvenScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RoundToEvenScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RoundToEvenScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RoundToEvenScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RoundToEvenScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "RoundToEvenScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-		{name: "RoundToEvenScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RoundToEvenScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RoundToEvenScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RoundToEvenScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RoundToEvenScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "RoundToEvenScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "SetElemFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "SetElemFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "UInt8"},
@@ -1821,24 +1017,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftAllLeftConcatInt64x2", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "ShiftAllLeftConcatInt64x4", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "ShiftAllLeftConcatInt64x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedInt16x8", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedInt16x16", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedInt16x32", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedInt32x4", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedInt32x8", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedInt32x16", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedInt64x2", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedInt64x4", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedInt64x8", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedUint16x8", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedUint16x16", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedUint16x32", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedUint32x4", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedUint32x8", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedUint32x16", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedUint64x2", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedUint64x4", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllLeftConcatMaskedUint64x8", argLength: 3, commutative: false, aux: "UInt8"},
 		{name: "ShiftAllLeftConcatUint16x8", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "ShiftAllLeftConcatUint16x16", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "ShiftAllLeftConcatUint16x32", argLength: 2, commutative: false, aux: "UInt8"},
@@ -1857,24 +1035,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftAllRightConcatInt64x2", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "ShiftAllRightConcatInt64x4", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "ShiftAllRightConcatInt64x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedInt16x8", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedInt16x16", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedInt16x32", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedInt32x4", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedInt32x8", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedInt32x16", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedInt64x2", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedInt64x4", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedInt64x8", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedUint16x8", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedUint16x16", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedUint16x32", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedUint32x4", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedUint32x8", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedUint32x16", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedUint64x2", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedUint64x4", argLength: 3, commutative: false, aux: "UInt8"},
-		{name: "ShiftAllRightConcatMaskedUint64x8", argLength: 3, commutative: false, aux: "UInt8"},
 		{name: "ShiftAllRightConcatUint16x8", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "ShiftAllRightConcatUint16x16", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "ShiftAllRightConcatUint16x32", argLength: 2, commutative: false, aux: "UInt8"},
@@ -1890,23 +1050,11 @@ func simdGenericOps() []opData {
 		{name: "TruncScaledFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "TruncScaledFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "TruncScaledFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-		{name: "TruncScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "TruncScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "TruncScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "TruncScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "TruncScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "TruncScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
 		{name: "TruncScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "TruncScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "TruncScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "TruncScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "TruncScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
 		{name: "TruncScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-		{name: "TruncScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "TruncScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "TruncScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "TruncScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "TruncScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-		{name: "TruncScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
 	}
 }
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index b45cccd96bb..9f6e10c95cb 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -4648,36 +4648,15 @@ const (
 	OpAbsInt64x2
 	OpAbsInt64x4
 	OpAbsInt64x8
-	OpAbsMaskedInt8x16
-	OpAbsMaskedInt8x32
-	OpAbsMaskedInt8x64
-	OpAbsMaskedInt16x8
-	OpAbsMaskedInt16x16
-	OpAbsMaskedInt16x32
-	OpAbsMaskedInt32x4
-	OpAbsMaskedInt32x8
-	OpAbsMaskedInt32x16
-	OpAbsMaskedInt64x2
-	OpAbsMaskedInt64x4
-	OpAbsMaskedInt64x8
 	OpAddDotProdPairsSaturatedInt32x4
 	OpAddDotProdPairsSaturatedInt32x8
 	OpAddDotProdPairsSaturatedInt32x16
-	OpAddDotProdPairsSaturatedMaskedInt32x4
-	OpAddDotProdPairsSaturatedMaskedInt32x8
-	OpAddDotProdPairsSaturatedMaskedInt32x16
 	OpAddDotProdQuadrupleInt32x4
 	OpAddDotProdQuadrupleInt32x8
 	OpAddDotProdQuadrupleInt32x16
-	OpAddDotProdQuadrupleMaskedInt32x4
-	OpAddDotProdQuadrupleMaskedInt32x8
-	OpAddDotProdQuadrupleMaskedInt32x16
 	OpAddDotProdQuadrupleSaturatedInt32x4
 	OpAddDotProdQuadrupleSaturatedInt32x8
 	OpAddDotProdQuadrupleSaturatedInt32x16
-	OpAddDotProdQuadrupleSaturatedMaskedInt32x4
-	OpAddDotProdQuadrupleSaturatedMaskedInt32x8
-	OpAddDotProdQuadrupleSaturatedMaskedInt32x16
 	OpAddFloat32x4
 	OpAddFloat32x8
 	OpAddFloat32x16
@@ -4696,36 +4675,6 @@ const (
 	OpAddInt64x2
 	OpAddInt64x4
 	OpAddInt64x8
-	OpAddMaskedFloat32x4
-	OpAddMaskedFloat32x8
-	OpAddMaskedFloat32x16
-	OpAddMaskedFloat64x2
-	OpAddMaskedFloat64x4
-	OpAddMaskedFloat64x8
-	OpAddMaskedInt8x16
-	OpAddMaskedInt8x32
-	OpAddMaskedInt8x64
-	OpAddMaskedInt16x8
-	OpAddMaskedInt16x16
-	OpAddMaskedInt16x32
-	OpAddMaskedInt32x4
-	OpAddMaskedInt32x8
-	OpAddMaskedInt32x16
-	OpAddMaskedInt64x2
-	OpAddMaskedInt64x4
-	OpAddMaskedInt64x8
-	OpAddMaskedUint8x16
-	OpAddMaskedUint8x32
-	OpAddMaskedUint8x64
-	OpAddMaskedUint16x8
-	OpAddMaskedUint16x16
-	OpAddMaskedUint16x32
-	OpAddMaskedUint32x4
-	OpAddMaskedUint32x8
-	OpAddMaskedUint32x16
-	OpAddMaskedUint64x2
-	OpAddMaskedUint64x4
-	OpAddMaskedUint64x8
 	OpAddPairsFloat32x4
 	OpAddPairsFloat32x8
 	OpAddPairsFloat64x2
@@ -4746,18 +4695,6 @@ const (
 	OpAddSaturatedInt16x8
 	OpAddSaturatedInt16x16
 	OpAddSaturatedInt16x32
-	OpAddSaturatedMaskedInt8x16
-	OpAddSaturatedMaskedInt8x32
-	OpAddSaturatedMaskedInt8x64
-	OpAddSaturatedMaskedInt16x8
-	OpAddSaturatedMaskedInt16x16
-	OpAddSaturatedMaskedInt16x32
-	OpAddSaturatedMaskedUint8x16
-	OpAddSaturatedMaskedUint8x32
-	OpAddSaturatedMaskedUint8x64
-	OpAddSaturatedMaskedUint16x8
-	OpAddSaturatedMaskedUint16x16
-	OpAddSaturatedMaskedUint16x32
 	OpAddSaturatedUint8x16
 	OpAddSaturatedUint8x32
 	OpAddSaturatedUint8x64
@@ -4792,18 +4729,6 @@ const (
 	OpAndInt64x2
 	OpAndInt64x4
 	OpAndInt64x8
-	OpAndMaskedInt32x4
-	OpAndMaskedInt32x8
-	OpAndMaskedInt32x16
-	OpAndMaskedInt64x2
-	OpAndMaskedInt64x4
-	OpAndMaskedInt64x8
-	OpAndMaskedUint32x4
-	OpAndMaskedUint32x8
-	OpAndMaskedUint32x16
-	OpAndMaskedUint64x2
-	OpAndMaskedUint64x4
-	OpAndMaskedUint64x8
 	OpAndNotInt8x16
 	OpAndNotInt8x32
 	OpAndNotInt8x64
@@ -4816,18 +4741,6 @@ const (
 	OpAndNotInt64x2
 	OpAndNotInt64x4
 	OpAndNotInt64x8
-	OpAndNotMaskedInt32x4
-	OpAndNotMaskedInt32x8
-	OpAndNotMaskedInt32x16
-	OpAndNotMaskedInt64x2
-	OpAndNotMaskedInt64x4
-	OpAndNotMaskedInt64x8
-	OpAndNotMaskedUint32x4
-	OpAndNotMaskedUint32x8
-	OpAndNotMaskedUint32x16
-	OpAndNotMaskedUint64x2
-	OpAndNotMaskedUint64x4
-	OpAndNotMaskedUint64x8
 	OpAndNotUint8x16
 	OpAndNotUint8x32
 	OpAndNotUint8x64
@@ -4852,12 +4765,6 @@ const (
 	OpAndUint64x2
 	OpAndUint64x4
 	OpAndUint64x8
-	OpAverageMaskedUint8x16
-	OpAverageMaskedUint8x32
-	OpAverageMaskedUint8x64
-	OpAverageMaskedUint16x8
-	OpAverageMaskedUint16x16
-	OpAverageMaskedUint16x32
 	OpAverageUint8x16
 	OpAverageUint8x32
 	OpAverageUint8x64
@@ -4870,16 +4777,6 @@ const (
 	OpBroadcast128Int16x8
 	OpBroadcast128Int32x4
 	OpBroadcast128Int64x2
-	OpBroadcast128MaskedFloat32x4
-	OpBroadcast128MaskedFloat64x2
-	OpBroadcast128MaskedInt8x16
-	OpBroadcast128MaskedInt16x8
-	OpBroadcast128MaskedInt32x4
-	OpBroadcast128MaskedInt64x2
-	OpBroadcast128MaskedUint8x16
-	OpBroadcast128MaskedUint16x8
-	OpBroadcast128MaskedUint32x4
-	OpBroadcast128MaskedUint64x2
 	OpBroadcast128Uint8x16
 	OpBroadcast128Uint16x8
 	OpBroadcast128Uint32x4
@@ -4890,16 +4787,6 @@ const (
 	OpBroadcast256Int16x8
 	OpBroadcast256Int32x4
 	OpBroadcast256Int64x2
-	OpBroadcast256MaskedFloat32x4
-	OpBroadcast256MaskedFloat64x2
-	OpBroadcast256MaskedInt8x16
-	OpBroadcast256MaskedInt16x8
-	OpBroadcast256MaskedInt32x4
-	OpBroadcast256MaskedInt64x2
-	OpBroadcast256MaskedUint8x16
-	OpBroadcast256MaskedUint16x8
-	OpBroadcast256MaskedUint32x4
-	OpBroadcast256MaskedUint64x2
 	OpBroadcast256Uint8x16
 	OpBroadcast256Uint16x8
 	OpBroadcast256Uint32x4
@@ -4910,16 +4797,6 @@ const (
 	OpBroadcast512Int16x8
 	OpBroadcast512Int32x4
 	OpBroadcast512Int64x2
-	OpBroadcast512MaskedFloat32x4
-	OpBroadcast512MaskedFloat64x2
-	OpBroadcast512MaskedInt8x16
-	OpBroadcast512MaskedInt16x8
-	OpBroadcast512MaskedInt32x4
-	OpBroadcast512MaskedInt64x2
-	OpBroadcast512MaskedUint8x16
-	OpBroadcast512MaskedUint16x8
-	OpBroadcast512MaskedUint32x4
-	OpBroadcast512MaskedUint64x2
 	OpBroadcast512Uint8x16
 	OpBroadcast512Uint16x8
 	OpBroadcast512Uint32x4
@@ -4961,15 +4838,9 @@ const (
 	OpConvertToInt32Float32x4
 	OpConvertToInt32Float32x8
 	OpConvertToInt32Float32x16
-	OpConvertToInt32MaskedFloat32x4
-	OpConvertToInt32MaskedFloat32x8
-	OpConvertToInt32MaskedFloat32x16
 	OpConvertToUint32Float32x4
 	OpConvertToUint32Float32x8
 	OpConvertToUint32Float32x16
-	OpConvertToUint32MaskedFloat32x4
-	OpConvertToUint32MaskedFloat32x8
-	OpConvertToUint32MaskedFloat32x16
 	OpCopySignInt8x16
 	OpCopySignInt8x32
 	OpCopySignInt16x8
@@ -4982,21 +4853,9 @@ const (
 	OpDivFloat64x2
 	OpDivFloat64x4
 	OpDivFloat64x8
-	OpDivMaskedFloat32x4
-	OpDivMaskedFloat32x8
-	OpDivMaskedFloat32x16
-	OpDivMaskedFloat64x2
-	OpDivMaskedFloat64x4
-	OpDivMaskedFloat64x8
 	OpDotProdPairsInt16x8
 	OpDotProdPairsInt16x16
 	OpDotProdPairsInt16x32
-	OpDotProdPairsMaskedInt16x8
-	OpDotProdPairsMaskedInt16x16
-	OpDotProdPairsMaskedInt16x32
-	OpDotProdPairsSaturatedMaskedUint8x16
-	OpDotProdPairsSaturatedMaskedUint8x32
-	OpDotProdPairsSaturatedMaskedUint8x64
 	OpDotProdPairsSaturatedUint8x16
 	OpDotProdPairsSaturatedUint8x32
 	OpDotProdPairsSaturatedUint8x64
@@ -5018,36 +4877,6 @@ const (
 	OpEqualInt64x2
 	OpEqualInt64x4
 	OpEqualInt64x8
-	OpEqualMaskedFloat32x4
-	OpEqualMaskedFloat32x8
-	OpEqualMaskedFloat32x16
-	OpEqualMaskedFloat64x2
-	OpEqualMaskedFloat64x4
-	OpEqualMaskedFloat64x8
-	OpEqualMaskedInt8x16
-	OpEqualMaskedInt8x32
-	OpEqualMaskedInt8x64
-	OpEqualMaskedInt16x8
-	OpEqualMaskedInt16x16
-	OpEqualMaskedInt16x32
-	OpEqualMaskedInt32x4
-	OpEqualMaskedInt32x8
-	OpEqualMaskedInt32x16
-	OpEqualMaskedInt64x2
-	OpEqualMaskedInt64x4
-	OpEqualMaskedInt64x8
-	OpEqualMaskedUint8x16
-	OpEqualMaskedUint8x32
-	OpEqualMaskedUint8x64
-	OpEqualMaskedUint16x8
-	OpEqualMaskedUint16x16
-	OpEqualMaskedUint16x32
-	OpEqualMaskedUint32x4
-	OpEqualMaskedUint32x8
-	OpEqualMaskedUint32x16
-	OpEqualMaskedUint64x2
-	OpEqualMaskedUint64x4
-	OpEqualMaskedUint64x8
 	OpEqualUint8x16
 	OpEqualUint8x32
 	OpEqualUint8x64
@@ -5094,9 +4923,6 @@ const (
 	OpFloorFloat32x8
 	OpFloorFloat64x2
 	OpFloorFloat64x4
-	OpGaloisFieldMulMaskedUint8x16
-	OpGaloisFieldMulMaskedUint8x32
-	OpGaloisFieldMulMaskedUint8x64
 	OpGaloisFieldMulUint8x16
 	OpGaloisFieldMulUint8x32
 	OpGaloisFieldMulUint8x64
@@ -5150,36 +4976,6 @@ const (
 	OpGreaterEqualInt16x32
 	OpGreaterEqualInt32x16
 	OpGreaterEqualInt64x8
-	OpGreaterEqualMaskedFloat32x4
-	OpGreaterEqualMaskedFloat32x8
-	OpGreaterEqualMaskedFloat32x16
-	OpGreaterEqualMaskedFloat64x2
-	OpGreaterEqualMaskedFloat64x4
-	OpGreaterEqualMaskedFloat64x8
-	OpGreaterEqualMaskedInt8x16
-	OpGreaterEqualMaskedInt8x32
-	OpGreaterEqualMaskedInt8x64
-	OpGreaterEqualMaskedInt16x8
-	OpGreaterEqualMaskedInt16x16
-	OpGreaterEqualMaskedInt16x32
-	OpGreaterEqualMaskedInt32x4
-	OpGreaterEqualMaskedInt32x8
-	OpGreaterEqualMaskedInt32x16
-	OpGreaterEqualMaskedInt64x2
-	OpGreaterEqualMaskedInt64x4
-	OpGreaterEqualMaskedInt64x8
-	OpGreaterEqualMaskedUint8x16
-	OpGreaterEqualMaskedUint8x32
-	OpGreaterEqualMaskedUint8x64
-	OpGreaterEqualMaskedUint16x8
-	OpGreaterEqualMaskedUint16x16
-	OpGreaterEqualMaskedUint16x32
-	OpGreaterEqualMaskedUint32x4
-	OpGreaterEqualMaskedUint32x8
-	OpGreaterEqualMaskedUint32x16
-	OpGreaterEqualMaskedUint64x2
-	OpGreaterEqualMaskedUint64x4
-	OpGreaterEqualMaskedUint64x8
 	OpGreaterEqualUint8x64
 	OpGreaterEqualUint16x32
 	OpGreaterEqualUint32x16
@@ -5202,36 +4998,6 @@ const (
 	OpGreaterInt64x2
 	OpGreaterInt64x4
 	OpGreaterInt64x8
-	OpGreaterMaskedFloat32x4
-	OpGreaterMaskedFloat32x8
-	OpGreaterMaskedFloat32x16
-	OpGreaterMaskedFloat64x2
-	OpGreaterMaskedFloat64x4
-	OpGreaterMaskedFloat64x8
-	OpGreaterMaskedInt8x16
-	OpGreaterMaskedInt8x32
-	OpGreaterMaskedInt8x64
-	OpGreaterMaskedInt16x8
-	OpGreaterMaskedInt16x16
-	OpGreaterMaskedInt16x32
-	OpGreaterMaskedInt32x4
-	OpGreaterMaskedInt32x8
-	OpGreaterMaskedInt32x16
-	OpGreaterMaskedInt64x2
-	OpGreaterMaskedInt64x4
-	OpGreaterMaskedInt64x8
-	OpGreaterMaskedUint8x16
-	OpGreaterMaskedUint8x32
-	OpGreaterMaskedUint8x64
-	OpGreaterMaskedUint16x8
-	OpGreaterMaskedUint16x16
-	OpGreaterMaskedUint16x32
-	OpGreaterMaskedUint32x4
-	OpGreaterMaskedUint32x8
-	OpGreaterMaskedUint32x16
-	OpGreaterMaskedUint64x2
-	OpGreaterMaskedUint64x4
-	OpGreaterMaskedUint64x8
 	OpGreaterUint8x64
 	OpGreaterUint16x32
 	OpGreaterUint32x16
@@ -5242,12 +5008,6 @@ const (
 	OpIsNanFloat64x2
 	OpIsNanFloat64x4
 	OpIsNanFloat64x8
-	OpIsNanMaskedFloat32x4
-	OpIsNanMaskedFloat32x8
-	OpIsNanMaskedFloat32x16
-	OpIsNanMaskedFloat64x2
-	OpIsNanMaskedFloat64x4
-	OpIsNanMaskedFloat64x8
 	OpLessEqualFloat32x4
 	OpLessEqualFloat32x8
 	OpLessEqualFloat32x16
@@ -5258,36 +5018,6 @@ const (
 	OpLessEqualInt16x32
 	OpLessEqualInt32x16
 	OpLessEqualInt64x8
-	OpLessEqualMaskedFloat32x4
-	OpLessEqualMaskedFloat32x8
-	OpLessEqualMaskedFloat32x16
-	OpLessEqualMaskedFloat64x2
-	OpLessEqualMaskedFloat64x4
-	OpLessEqualMaskedFloat64x8
-	OpLessEqualMaskedInt8x16
-	OpLessEqualMaskedInt8x32
-	OpLessEqualMaskedInt8x64
-	OpLessEqualMaskedInt16x8
-	OpLessEqualMaskedInt16x16
-	OpLessEqualMaskedInt16x32
-	OpLessEqualMaskedInt32x4
-	OpLessEqualMaskedInt32x8
-	OpLessEqualMaskedInt32x16
-	OpLessEqualMaskedInt64x2
-	OpLessEqualMaskedInt64x4
-	OpLessEqualMaskedInt64x8
-	OpLessEqualMaskedUint8x16
-	OpLessEqualMaskedUint8x32
-	OpLessEqualMaskedUint8x64
-	OpLessEqualMaskedUint16x8
-	OpLessEqualMaskedUint16x16
-	OpLessEqualMaskedUint16x32
-	OpLessEqualMaskedUint32x4
-	OpLessEqualMaskedUint32x8
-	OpLessEqualMaskedUint32x16
-	OpLessEqualMaskedUint64x2
-	OpLessEqualMaskedUint64x4
-	OpLessEqualMaskedUint64x8
 	OpLessEqualUint8x64
 	OpLessEqualUint16x32
 	OpLessEqualUint32x16
@@ -5302,36 +5032,6 @@ const (
 	OpLessInt16x32
 	OpLessInt32x16
 	OpLessInt64x8
-	OpLessMaskedFloat32x4
-	OpLessMaskedFloat32x8
-	OpLessMaskedFloat32x16
-	OpLessMaskedFloat64x2
-	OpLessMaskedFloat64x4
-	OpLessMaskedFloat64x8
-	OpLessMaskedInt8x16
-	OpLessMaskedInt8x32
-	OpLessMaskedInt8x64
-	OpLessMaskedInt16x8
-	OpLessMaskedInt16x16
-	OpLessMaskedInt16x32
-	OpLessMaskedInt32x4
-	OpLessMaskedInt32x8
-	OpLessMaskedInt32x16
-	OpLessMaskedInt64x2
-	OpLessMaskedInt64x4
-	OpLessMaskedInt64x8
-	OpLessMaskedUint8x16
-	OpLessMaskedUint8x32
-	OpLessMaskedUint8x64
-	OpLessMaskedUint16x8
-	OpLessMaskedUint16x16
-	OpLessMaskedUint16x32
-	OpLessMaskedUint32x4
-	OpLessMaskedUint32x8
-	OpLessMaskedUint32x16
-	OpLessMaskedUint64x2
-	OpLessMaskedUint64x4
-	OpLessMaskedUint64x8
 	OpLessUint8x64
 	OpLessUint16x32
 	OpLessUint32x16
@@ -5354,36 +5054,6 @@ const (
 	OpMaxInt64x2
 	OpMaxInt64x4
 	OpMaxInt64x8
-	OpMaxMaskedFloat32x4
-	OpMaxMaskedFloat32x8
-	OpMaxMaskedFloat32x16
-	OpMaxMaskedFloat64x2
-	OpMaxMaskedFloat64x4
-	OpMaxMaskedFloat64x8
-	OpMaxMaskedInt8x16
-	OpMaxMaskedInt8x32
-	OpMaxMaskedInt8x64
-	OpMaxMaskedInt16x8
-	OpMaxMaskedInt16x16
-	OpMaxMaskedInt16x32
-	OpMaxMaskedInt32x4
-	OpMaxMaskedInt32x8
-	OpMaxMaskedInt32x16
-	OpMaxMaskedInt64x2
-	OpMaxMaskedInt64x4
-	OpMaxMaskedInt64x8
-	OpMaxMaskedUint8x16
-	OpMaxMaskedUint8x32
-	OpMaxMaskedUint8x64
-	OpMaxMaskedUint16x8
-	OpMaxMaskedUint16x16
-	OpMaxMaskedUint16x32
-	OpMaxMaskedUint32x4
-	OpMaxMaskedUint32x8
-	OpMaxMaskedUint32x16
-	OpMaxMaskedUint64x2
-	OpMaxMaskedUint64x4
-	OpMaxMaskedUint64x8
 	OpMaxUint8x16
 	OpMaxUint8x32
 	OpMaxUint8x64
@@ -5414,36 +5084,6 @@ const (
 	OpMinInt64x2
 	OpMinInt64x4
 	OpMinInt64x8
-	OpMinMaskedFloat32x4
-	OpMinMaskedFloat32x8
-	OpMinMaskedFloat32x16
-	OpMinMaskedFloat64x2
-	OpMinMaskedFloat64x4
-	OpMinMaskedFloat64x8
-	OpMinMaskedInt8x16
-	OpMinMaskedInt8x32
-	OpMinMaskedInt8x64
-	OpMinMaskedInt16x8
-	OpMinMaskedInt16x16
-	OpMinMaskedInt16x32
-	OpMinMaskedInt32x4
-	OpMinMaskedInt32x8
-	OpMinMaskedInt32x16
-	OpMinMaskedInt64x2
-	OpMinMaskedInt64x4
-	OpMinMaskedInt64x8
-	OpMinMaskedUint8x16
-	OpMinMaskedUint8x32
-	OpMinMaskedUint8x64
-	OpMinMaskedUint16x8
-	OpMinMaskedUint16x16
-	OpMinMaskedUint16x32
-	OpMinMaskedUint32x4
-	OpMinMaskedUint32x8
-	OpMinMaskedUint32x16
-	OpMinMaskedUint64x2
-	OpMinMaskedUint64x4
-	OpMinMaskedUint64x8
 	OpMinUint8x16
 	OpMinUint8x32
 	OpMinUint8x64
@@ -5462,24 +5102,12 @@ const (
 	OpMulAddFloat64x2
 	OpMulAddFloat64x4
 	OpMulAddFloat64x8
-	OpMulAddMaskedFloat32x4
-	OpMulAddMaskedFloat32x8
-	OpMulAddMaskedFloat32x16
-	OpMulAddMaskedFloat64x2
-	OpMulAddMaskedFloat64x4
-	OpMulAddMaskedFloat64x8
 	OpMulAddSubFloat32x4
 	OpMulAddSubFloat32x8
 	OpMulAddSubFloat32x16
 	OpMulAddSubFloat64x2
 	OpMulAddSubFloat64x4
 	OpMulAddSubFloat64x8
-	OpMulAddSubMaskedFloat32x4
-	OpMulAddSubMaskedFloat32x8
-	OpMulAddSubMaskedFloat32x16
-	OpMulAddSubMaskedFloat64x2
-	OpMulAddSubMaskedFloat64x4
-	OpMulAddSubMaskedFloat64x8
 	OpMulEvenWidenInt32x4
 	OpMulEvenWidenInt32x8
 	OpMulEvenWidenUint32x4
@@ -5493,12 +5121,6 @@ const (
 	OpMulHighInt16x8
 	OpMulHighInt16x16
 	OpMulHighInt16x32
-	OpMulHighMaskedInt16x8
-	OpMulHighMaskedInt16x16
-	OpMulHighMaskedInt16x32
-	OpMulHighMaskedUint16x8
-	OpMulHighMaskedUint16x16
-	OpMulHighMaskedUint16x32
 	OpMulHighUint16x8
 	OpMulHighUint16x16
 	OpMulHighUint16x32
@@ -5511,42 +5133,12 @@ const (
 	OpMulInt64x2
 	OpMulInt64x4
 	OpMulInt64x8
-	OpMulMaskedFloat32x4
-	OpMulMaskedFloat32x8
-	OpMulMaskedFloat32x16
-	OpMulMaskedFloat64x2
-	OpMulMaskedFloat64x4
-	OpMulMaskedFloat64x8
-	OpMulMaskedInt16x8
-	OpMulMaskedInt16x16
-	OpMulMaskedInt16x32
-	OpMulMaskedInt32x4
-	OpMulMaskedInt32x8
-	OpMulMaskedInt32x16
-	OpMulMaskedInt64x2
-	OpMulMaskedInt64x4
-	OpMulMaskedInt64x8
-	OpMulMaskedUint16x8
-	OpMulMaskedUint16x16
-	OpMulMaskedUint16x32
-	OpMulMaskedUint32x4
-	OpMulMaskedUint32x8
-	OpMulMaskedUint32x16
-	OpMulMaskedUint64x2
-	OpMulMaskedUint64x4
-	OpMulMaskedUint64x8
 	OpMulSubAddFloat32x4
 	OpMulSubAddFloat32x8
 	OpMulSubAddFloat32x16
 	OpMulSubAddFloat64x2
 	OpMulSubAddFloat64x4
 	OpMulSubAddFloat64x8
-	OpMulSubAddMaskedFloat32x4
-	OpMulSubAddMaskedFloat32x8
-	OpMulSubAddMaskedFloat32x16
-	OpMulSubAddMaskedFloat64x2
-	OpMulSubAddMaskedFloat64x4
-	OpMulSubAddMaskedFloat64x8
 	OpMulUint16x8
 	OpMulUint16x16
 	OpMulUint16x32
@@ -5566,36 +5158,6 @@ const (
 	OpNotEqualInt16x32
 	OpNotEqualInt32x16
 	OpNotEqualInt64x8
-	OpNotEqualMaskedFloat32x4
-	OpNotEqualMaskedFloat32x8
-	OpNotEqualMaskedFloat32x16
-	OpNotEqualMaskedFloat64x2
-	OpNotEqualMaskedFloat64x4
-	OpNotEqualMaskedFloat64x8
-	OpNotEqualMaskedInt8x16
-	OpNotEqualMaskedInt8x32
-	OpNotEqualMaskedInt8x64
-	OpNotEqualMaskedInt16x8
-	OpNotEqualMaskedInt16x16
-	OpNotEqualMaskedInt16x32
-	OpNotEqualMaskedInt32x4
-	OpNotEqualMaskedInt32x8
-	OpNotEqualMaskedInt32x16
-	OpNotEqualMaskedInt64x2
-	OpNotEqualMaskedInt64x4
-	OpNotEqualMaskedInt64x8
-	OpNotEqualMaskedUint8x16
-	OpNotEqualMaskedUint8x32
-	OpNotEqualMaskedUint8x64
-	OpNotEqualMaskedUint16x8
-	OpNotEqualMaskedUint16x16
-	OpNotEqualMaskedUint16x32
-	OpNotEqualMaskedUint32x4
-	OpNotEqualMaskedUint32x8
-	OpNotEqualMaskedUint32x16
-	OpNotEqualMaskedUint64x2
-	OpNotEqualMaskedUint64x4
-	OpNotEqualMaskedUint64x8
 	OpNotEqualUint8x64
 	OpNotEqualUint16x32
 	OpNotEqualUint32x16
@@ -5612,30 +5174,6 @@ const (
 	OpOnesCountInt64x2
 	OpOnesCountInt64x4
 	OpOnesCountInt64x8
-	OpOnesCountMaskedInt8x16
-	OpOnesCountMaskedInt8x32
-	OpOnesCountMaskedInt8x64
-	OpOnesCountMaskedInt16x8
-	OpOnesCountMaskedInt16x16
-	OpOnesCountMaskedInt16x32
-	OpOnesCountMaskedInt32x4
-	OpOnesCountMaskedInt32x8
-	OpOnesCountMaskedInt32x16
-	OpOnesCountMaskedInt64x2
-	OpOnesCountMaskedInt64x4
-	OpOnesCountMaskedInt64x8
-	OpOnesCountMaskedUint8x16
-	OpOnesCountMaskedUint8x32
-	OpOnesCountMaskedUint8x64
-	OpOnesCountMaskedUint16x8
-	OpOnesCountMaskedUint16x16
-	OpOnesCountMaskedUint16x32
-	OpOnesCountMaskedUint32x4
-	OpOnesCountMaskedUint32x8
-	OpOnesCountMaskedUint32x16
-	OpOnesCountMaskedUint64x2
-	OpOnesCountMaskedUint64x4
-	OpOnesCountMaskedUint64x8
 	OpOnesCountUint8x16
 	OpOnesCountUint8x32
 	OpOnesCountUint8x64
@@ -5660,18 +5198,6 @@ const (
 	OpOrInt64x2
 	OpOrInt64x4
 	OpOrInt64x8
-	OpOrMaskedInt32x4
-	OpOrMaskedInt32x8
-	OpOrMaskedInt32x16
-	OpOrMaskedInt64x2
-	OpOrMaskedInt64x4
-	OpOrMaskedInt64x8
-	OpOrMaskedUint32x4
-	OpOrMaskedUint32x8
-	OpOrMaskedUint32x16
-	OpOrMaskedUint64x2
-	OpOrMaskedUint64x4
-	OpOrMaskedUint64x8
 	OpOrUint8x16
 	OpOrUint8x32
 	OpOrUint8x64
@@ -5702,36 +5228,6 @@ const (
 	OpPermute2Int64x2
 	OpPermute2Int64x4
 	OpPermute2Int64x8
-	OpPermute2MaskedFloat32x4
-	OpPermute2MaskedFloat32x8
-	OpPermute2MaskedFloat32x16
-	OpPermute2MaskedFloat64x2
-	OpPermute2MaskedFloat64x4
-	OpPermute2MaskedFloat64x8
-	OpPermute2MaskedInt8x16
-	OpPermute2MaskedInt8x32
-	OpPermute2MaskedInt8x64
-	OpPermute2MaskedInt16x8
-	OpPermute2MaskedInt16x16
-	OpPermute2MaskedInt16x32
-	OpPermute2MaskedInt32x4
-	OpPermute2MaskedInt32x8
-	OpPermute2MaskedInt32x16
-	OpPermute2MaskedInt64x2
-	OpPermute2MaskedInt64x4
-	OpPermute2MaskedInt64x8
-	OpPermute2MaskedUint8x16
-	OpPermute2MaskedUint8x32
-	OpPermute2MaskedUint8x64
-	OpPermute2MaskedUint16x8
-	OpPermute2MaskedUint16x16
-	OpPermute2MaskedUint16x32
-	OpPermute2MaskedUint32x4
-	OpPermute2MaskedUint32x8
-	OpPermute2MaskedUint32x16
-	OpPermute2MaskedUint64x2
-	OpPermute2MaskedUint64x4
-	OpPermute2MaskedUint64x8
 	OpPermute2Uint8x16
 	OpPermute2Uint8x32
 	OpPermute2Uint8x64
@@ -5758,30 +5254,6 @@ const (
 	OpPermuteInt32x16
 	OpPermuteInt64x4
 	OpPermuteInt64x8
-	OpPermuteMaskedFloat32x8
-	OpPermuteMaskedFloat32x16
-	OpPermuteMaskedFloat64x4
-	OpPermuteMaskedFloat64x8
-	OpPermuteMaskedInt8x16
-	OpPermuteMaskedInt8x32
-	OpPermuteMaskedInt8x64
-	OpPermuteMaskedInt16x8
-	OpPermuteMaskedInt16x16
-	OpPermuteMaskedInt16x32
-	OpPermuteMaskedInt32x8
-	OpPermuteMaskedInt32x16
-	OpPermuteMaskedInt64x4
-	OpPermuteMaskedInt64x8
-	OpPermuteMaskedUint8x16
-	OpPermuteMaskedUint8x32
-	OpPermuteMaskedUint8x64
-	OpPermuteMaskedUint16x8
-	OpPermuteMaskedUint16x16
-	OpPermuteMaskedUint16x32
-	OpPermuteMaskedUint32x8
-	OpPermuteMaskedUint32x16
-	OpPermuteMaskedUint64x4
-	OpPermuteMaskedUint64x8
 	OpPermuteUint8x16
 	OpPermuteUint8x32
 	OpPermuteUint8x64
@@ -5798,42 +5270,18 @@ const (
 	OpReciprocalFloat64x2
 	OpReciprocalFloat64x4
 	OpReciprocalFloat64x8
-	OpReciprocalMaskedFloat32x4
-	OpReciprocalMaskedFloat32x8
-	OpReciprocalMaskedFloat32x16
-	OpReciprocalMaskedFloat64x2
-	OpReciprocalMaskedFloat64x4
-	OpReciprocalMaskedFloat64x8
 	OpReciprocalSqrtFloat32x4
 	OpReciprocalSqrtFloat32x8
 	OpReciprocalSqrtFloat32x16
 	OpReciprocalSqrtFloat64x2
 	OpReciprocalSqrtFloat64x4
 	OpReciprocalSqrtFloat64x8
-	OpReciprocalSqrtMaskedFloat32x4
-	OpReciprocalSqrtMaskedFloat32x8
-	OpReciprocalSqrtMaskedFloat32x16
-	OpReciprocalSqrtMaskedFloat64x2
-	OpReciprocalSqrtMaskedFloat64x4
-	OpReciprocalSqrtMaskedFloat64x8
 	OpRotateLeftInt32x4
 	OpRotateLeftInt32x8
 	OpRotateLeftInt32x16
 	OpRotateLeftInt64x2
 	OpRotateLeftInt64x4
 	OpRotateLeftInt64x8
-	OpRotateLeftMaskedInt32x4
-	OpRotateLeftMaskedInt32x8
-	OpRotateLeftMaskedInt32x16
-	OpRotateLeftMaskedInt64x2
-	OpRotateLeftMaskedInt64x4
-	OpRotateLeftMaskedInt64x8
-	OpRotateLeftMaskedUint32x4
-	OpRotateLeftMaskedUint32x8
-	OpRotateLeftMaskedUint32x16
-	OpRotateLeftMaskedUint64x2
-	OpRotateLeftMaskedUint64x4
-	OpRotateLeftMaskedUint64x8
 	OpRotateLeftUint32x4
 	OpRotateLeftUint32x8
 	OpRotateLeftUint32x16
@@ -5846,18 +5294,6 @@ const (
 	OpRotateRightInt64x2
 	OpRotateRightInt64x4
 	OpRotateRightInt64x8
-	OpRotateRightMaskedInt32x4
-	OpRotateRightMaskedInt32x8
-	OpRotateRightMaskedInt32x16
-	OpRotateRightMaskedInt64x2
-	OpRotateRightMaskedInt64x4
-	OpRotateRightMaskedInt64x8
-	OpRotateRightMaskedUint32x4
-	OpRotateRightMaskedUint32x8
-	OpRotateRightMaskedUint32x16
-	OpRotateRightMaskedUint64x2
-	OpRotateRightMaskedUint64x4
-	OpRotateRightMaskedUint64x8
 	OpRotateRightUint32x4
 	OpRotateRightUint32x8
 	OpRotateRightUint32x16
@@ -5874,12 +5310,6 @@ const (
 	OpScaleFloat64x2
 	OpScaleFloat64x4
 	OpScaleFloat64x8
-	OpScaleMaskedFloat32x4
-	OpScaleMaskedFloat32x8
-	OpScaleMaskedFloat32x16
-	OpScaleMaskedFloat64x2
-	OpScaleMaskedFloat64x4
-	OpScaleMaskedFloat64x8
 	OpSetHiFloat32x8
 	OpSetHiFloat32x16
 	OpSetHiFloat64x4
@@ -5929,24 +5359,6 @@ const (
 	OpShiftAllLeftInt64x2
 	OpShiftAllLeftInt64x4
 	OpShiftAllLeftInt64x8
-	OpShiftAllLeftMaskedInt16x8
-	OpShiftAllLeftMaskedInt16x16
-	OpShiftAllLeftMaskedInt16x32
-	OpShiftAllLeftMaskedInt32x4
-	OpShiftAllLeftMaskedInt32x8
-	OpShiftAllLeftMaskedInt32x16
-	OpShiftAllLeftMaskedInt64x2
-	OpShiftAllLeftMaskedInt64x4
-	OpShiftAllLeftMaskedInt64x8
-	OpShiftAllLeftMaskedUint16x8
-	OpShiftAllLeftMaskedUint16x16
-	OpShiftAllLeftMaskedUint16x32
-	OpShiftAllLeftMaskedUint32x4
-	OpShiftAllLeftMaskedUint32x8
-	OpShiftAllLeftMaskedUint32x16
-	OpShiftAllLeftMaskedUint64x2
-	OpShiftAllLeftMaskedUint64x4
-	OpShiftAllLeftMaskedUint64x8
 	OpShiftAllLeftUint16x8
 	OpShiftAllLeftUint16x16
 	OpShiftAllLeftUint16x32
@@ -5965,24 +5377,6 @@ const (
 	OpShiftAllRightInt64x2
 	OpShiftAllRightInt64x4
 	OpShiftAllRightInt64x8
-	OpShiftAllRightMaskedInt16x8
-	OpShiftAllRightMaskedInt16x16
-	OpShiftAllRightMaskedInt16x32
-	OpShiftAllRightMaskedInt32x4
-	OpShiftAllRightMaskedInt32x8
-	OpShiftAllRightMaskedInt32x16
-	OpShiftAllRightMaskedInt64x2
-	OpShiftAllRightMaskedInt64x4
-	OpShiftAllRightMaskedInt64x8
-	OpShiftAllRightMaskedUint16x8
-	OpShiftAllRightMaskedUint16x16
-	OpShiftAllRightMaskedUint16x32
-	OpShiftAllRightMaskedUint32x4
-	OpShiftAllRightMaskedUint32x8
-	OpShiftAllRightMaskedUint32x16
-	OpShiftAllRightMaskedUint64x2
-	OpShiftAllRightMaskedUint64x4
-	OpShiftAllRightMaskedUint64x8
 	OpShiftAllRightUint16x8
 	OpShiftAllRightUint16x16
 	OpShiftAllRightUint16x32
@@ -6001,24 +5395,6 @@ const (
 	OpShiftLeftConcatInt64x2
 	OpShiftLeftConcatInt64x4
 	OpShiftLeftConcatInt64x8
-	OpShiftLeftConcatMaskedInt16x8
-	OpShiftLeftConcatMaskedInt16x16
-	OpShiftLeftConcatMaskedInt16x32
-	OpShiftLeftConcatMaskedInt32x4
-	OpShiftLeftConcatMaskedInt32x8
-	OpShiftLeftConcatMaskedInt32x16
-	OpShiftLeftConcatMaskedInt64x2
-	OpShiftLeftConcatMaskedInt64x4
-	OpShiftLeftConcatMaskedInt64x8
-	OpShiftLeftConcatMaskedUint16x8
-	OpShiftLeftConcatMaskedUint16x16
-	OpShiftLeftConcatMaskedUint16x32
-	OpShiftLeftConcatMaskedUint32x4
-	OpShiftLeftConcatMaskedUint32x8
-	OpShiftLeftConcatMaskedUint32x16
-	OpShiftLeftConcatMaskedUint64x2
-	OpShiftLeftConcatMaskedUint64x4
-	OpShiftLeftConcatMaskedUint64x8
 	OpShiftLeftConcatUint16x8
 	OpShiftLeftConcatUint16x16
 	OpShiftLeftConcatUint16x32
@@ -6037,24 +5413,6 @@ const (
 	OpShiftLeftInt64x2
 	OpShiftLeftInt64x4
 	OpShiftLeftInt64x8
-	OpShiftLeftMaskedInt16x8
-	OpShiftLeftMaskedInt16x16
-	OpShiftLeftMaskedInt16x32
-	OpShiftLeftMaskedInt32x4
-	OpShiftLeftMaskedInt32x8
-	OpShiftLeftMaskedInt32x16
-	OpShiftLeftMaskedInt64x2
-	OpShiftLeftMaskedInt64x4
-	OpShiftLeftMaskedInt64x8
-	OpShiftLeftMaskedUint16x8
-	OpShiftLeftMaskedUint16x16
-	OpShiftLeftMaskedUint16x32
-	OpShiftLeftMaskedUint32x4
-	OpShiftLeftMaskedUint32x8
-	OpShiftLeftMaskedUint32x16
-	OpShiftLeftMaskedUint64x2
-	OpShiftLeftMaskedUint64x4
-	OpShiftLeftMaskedUint64x8
 	OpShiftLeftUint16x8
 	OpShiftLeftUint16x16
 	OpShiftLeftUint16x32
@@ -6073,24 +5431,6 @@ const (
 	OpShiftRightConcatInt64x2
 	OpShiftRightConcatInt64x4
 	OpShiftRightConcatInt64x8
-	OpShiftRightConcatMaskedInt16x8
-	OpShiftRightConcatMaskedInt16x16
-	OpShiftRightConcatMaskedInt16x32
-	OpShiftRightConcatMaskedInt32x4
-	OpShiftRightConcatMaskedInt32x8
-	OpShiftRightConcatMaskedInt32x16
-	OpShiftRightConcatMaskedInt64x2
-	OpShiftRightConcatMaskedInt64x4
-	OpShiftRightConcatMaskedInt64x8
-	OpShiftRightConcatMaskedUint16x8
-	OpShiftRightConcatMaskedUint16x16
-	OpShiftRightConcatMaskedUint16x32
-	OpShiftRightConcatMaskedUint32x4
-	OpShiftRightConcatMaskedUint32x8
-	OpShiftRightConcatMaskedUint32x16
-	OpShiftRightConcatMaskedUint64x2
-	OpShiftRightConcatMaskedUint64x4
-	OpShiftRightConcatMaskedUint64x8
 	OpShiftRightConcatUint16x8
 	OpShiftRightConcatUint16x16
 	OpShiftRightConcatUint16x32
@@ -6109,24 +5449,6 @@ const (
 	OpShiftRightInt64x2
 	OpShiftRightInt64x4
 	OpShiftRightInt64x8
-	OpShiftRightMaskedInt16x8
-	OpShiftRightMaskedInt16x16
-	OpShiftRightMaskedInt16x32
-	OpShiftRightMaskedInt32x4
-	OpShiftRightMaskedInt32x8
-	OpShiftRightMaskedInt32x16
-	OpShiftRightMaskedInt64x2
-	OpShiftRightMaskedInt64x4
-	OpShiftRightMaskedInt64x8
-	OpShiftRightMaskedUint16x8
-	OpShiftRightMaskedUint16x16
-	OpShiftRightMaskedUint16x32
-	OpShiftRightMaskedUint32x4
-	OpShiftRightMaskedUint32x8
-	OpShiftRightMaskedUint32x16
-	OpShiftRightMaskedUint64x2
-	OpShiftRightMaskedUint64x4
-	OpShiftRightMaskedUint64x8
 	OpShiftRightUint16x8
 	OpShiftRightUint16x16
 	OpShiftRightUint16x32
@@ -6142,12 +5464,6 @@ const (
 	OpSqrtFloat64x2
 	OpSqrtFloat64x4
 	OpSqrtFloat64x8
-	OpSqrtMaskedFloat32x4
-	OpSqrtMaskedFloat32x8
-	OpSqrtMaskedFloat32x16
-	OpSqrtMaskedFloat64x2
-	OpSqrtMaskedFloat64x4
-	OpSqrtMaskedFloat64x8
 	OpSubFloat32x4
 	OpSubFloat32x8
 	OpSubFloat32x16
@@ -6166,36 +5482,6 @@ const (
 	OpSubInt64x2
 	OpSubInt64x4
 	OpSubInt64x8
-	OpSubMaskedFloat32x4
-	OpSubMaskedFloat32x8
-	OpSubMaskedFloat32x16
-	OpSubMaskedFloat64x2
-	OpSubMaskedFloat64x4
-	OpSubMaskedFloat64x8
-	OpSubMaskedInt8x16
-	OpSubMaskedInt8x32
-	OpSubMaskedInt8x64
-	OpSubMaskedInt16x8
-	OpSubMaskedInt16x16
-	OpSubMaskedInt16x32
-	OpSubMaskedInt32x4
-	OpSubMaskedInt32x8
-	OpSubMaskedInt32x16
-	OpSubMaskedInt64x2
-	OpSubMaskedInt64x4
-	OpSubMaskedInt64x8
-	OpSubMaskedUint8x16
-	OpSubMaskedUint8x32
-	OpSubMaskedUint8x64
-	OpSubMaskedUint16x8
-	OpSubMaskedUint16x16
-	OpSubMaskedUint16x32
-	OpSubMaskedUint32x4
-	OpSubMaskedUint32x8
-	OpSubMaskedUint32x16
-	OpSubMaskedUint64x2
-	OpSubMaskedUint64x4
-	OpSubMaskedUint64x8
 	OpSubPairsFloat32x4
 	OpSubPairsFloat32x8
 	OpSubPairsFloat64x2
@@ -6216,18 +5502,6 @@ const (
 	OpSubSaturatedInt16x8
 	OpSubSaturatedInt16x16
 	OpSubSaturatedInt16x32
-	OpSubSaturatedMaskedInt8x16
-	OpSubSaturatedMaskedInt8x32
-	OpSubSaturatedMaskedInt8x64
-	OpSubSaturatedMaskedInt16x8
-	OpSubSaturatedMaskedInt16x16
-	OpSubSaturatedMaskedInt16x32
-	OpSubSaturatedMaskedUint8x16
-	OpSubSaturatedMaskedUint8x32
-	OpSubSaturatedMaskedUint8x64
-	OpSubSaturatedMaskedUint16x8
-	OpSubSaturatedMaskedUint16x16
-	OpSubSaturatedMaskedUint16x32
 	OpSubSaturatedUint8x16
 	OpSubSaturatedUint8x32
 	OpSubSaturatedUint8x64
@@ -6262,18 +5536,6 @@ const (
 	OpXorInt64x2
 	OpXorInt64x4
 	OpXorInt64x8
-	OpXorMaskedInt32x4
-	OpXorMaskedInt32x8
-	OpXorMaskedInt32x16
-	OpXorMaskedInt64x2
-	OpXorMaskedInt64x4
-	OpXorMaskedInt64x8
-	OpXorMaskedUint32x4
-	OpXorMaskedUint32x8
-	OpXorMaskedUint32x16
-	OpXorMaskedUint64x2
-	OpXorMaskedUint64x4
-	OpXorMaskedUint64x8
 	OpXorUint8x16
 	OpXorUint8x32
 	OpXorUint8x64
@@ -6298,57 +5560,27 @@ const (
 	OpCeilScaledFloat64x2
 	OpCeilScaledFloat64x4
 	OpCeilScaledFloat64x8
-	OpCeilScaledMaskedFloat32x4
-	OpCeilScaledMaskedFloat32x8
-	OpCeilScaledMaskedFloat32x16
-	OpCeilScaledMaskedFloat64x2
-	OpCeilScaledMaskedFloat64x4
-	OpCeilScaledMaskedFloat64x8
 	OpCeilScaledResidueFloat32x4
 	OpCeilScaledResidueFloat32x8
 	OpCeilScaledResidueFloat32x16
 	OpCeilScaledResidueFloat64x2
 	OpCeilScaledResidueFloat64x4
 	OpCeilScaledResidueFloat64x8
-	OpCeilScaledResidueMaskedFloat32x4
-	OpCeilScaledResidueMaskedFloat32x8
-	OpCeilScaledResidueMaskedFloat32x16
-	OpCeilScaledResidueMaskedFloat64x2
-	OpCeilScaledResidueMaskedFloat64x4
-	OpCeilScaledResidueMaskedFloat64x8
 	OpFloorScaledFloat32x4
 	OpFloorScaledFloat32x8
 	OpFloorScaledFloat32x16
 	OpFloorScaledFloat64x2
 	OpFloorScaledFloat64x4
 	OpFloorScaledFloat64x8
-	OpFloorScaledMaskedFloat32x4
-	OpFloorScaledMaskedFloat32x8
-	OpFloorScaledMaskedFloat32x16
-	OpFloorScaledMaskedFloat64x2
-	OpFloorScaledMaskedFloat64x4
-	OpFloorScaledMaskedFloat64x8
 	OpFloorScaledResidueFloat32x4
 	OpFloorScaledResidueFloat32x8
 	OpFloorScaledResidueFloat32x16
 	OpFloorScaledResidueFloat64x2
 	OpFloorScaledResidueFloat64x4
 	OpFloorScaledResidueFloat64x8
-	OpFloorScaledResidueMaskedFloat32x4
-	OpFloorScaledResidueMaskedFloat32x8
-	OpFloorScaledResidueMaskedFloat32x16
-	OpFloorScaledResidueMaskedFloat64x2
-	OpFloorScaledResidueMaskedFloat64x4
-	OpFloorScaledResidueMaskedFloat64x8
-	OpGaloisFieldAffineTransformInverseMaskedUint8x16
-	OpGaloisFieldAffineTransformInverseMaskedUint8x32
-	OpGaloisFieldAffineTransformInverseMaskedUint8x64
 	OpGaloisFieldAffineTransformInverseUint8x16
 	OpGaloisFieldAffineTransformInverseUint8x32
 	OpGaloisFieldAffineTransformInverseUint8x64
-	OpGaloisFieldAffineTransformMaskedUint8x16
-	OpGaloisFieldAffineTransformMaskedUint8x32
-	OpGaloisFieldAffineTransformMaskedUint8x64
 	OpGaloisFieldAffineTransformUint8x16
 	OpGaloisFieldAffineTransformUint8x32
 	OpGaloisFieldAffineTransformUint8x64
@@ -6368,18 +5600,6 @@ const (
 	OpRotateAllLeftInt64x2
 	OpRotateAllLeftInt64x4
 	OpRotateAllLeftInt64x8
-	OpRotateAllLeftMaskedInt32x4
-	OpRotateAllLeftMaskedInt32x8
-	OpRotateAllLeftMaskedInt32x16
-	OpRotateAllLeftMaskedInt64x2
-	OpRotateAllLeftMaskedInt64x4
-	OpRotateAllLeftMaskedInt64x8
-	OpRotateAllLeftMaskedUint32x4
-	OpRotateAllLeftMaskedUint32x8
-	OpRotateAllLeftMaskedUint32x16
-	OpRotateAllLeftMaskedUint64x2
-	OpRotateAllLeftMaskedUint64x4
-	OpRotateAllLeftMaskedUint64x8
 	OpRotateAllLeftUint32x4
 	OpRotateAllLeftUint32x8
 	OpRotateAllLeftUint32x16
@@ -6392,18 +5612,6 @@ const (
 	OpRotateAllRightInt64x2
 	OpRotateAllRightInt64x4
 	OpRotateAllRightInt64x8
-	OpRotateAllRightMaskedInt32x4
-	OpRotateAllRightMaskedInt32x8
-	OpRotateAllRightMaskedInt32x16
-	OpRotateAllRightMaskedInt64x2
-	OpRotateAllRightMaskedInt64x4
-	OpRotateAllRightMaskedInt64x8
-	OpRotateAllRightMaskedUint32x4
-	OpRotateAllRightMaskedUint32x8
-	OpRotateAllRightMaskedUint32x16
-	OpRotateAllRightMaskedUint64x2
-	OpRotateAllRightMaskedUint64x4
-	OpRotateAllRightMaskedUint64x8
 	OpRotateAllRightUint32x4
 	OpRotateAllRightUint32x8
 	OpRotateAllRightUint32x16
@@ -6416,24 +5624,12 @@ const (
 	OpRoundToEvenScaledFloat64x2
 	OpRoundToEvenScaledFloat64x4
 	OpRoundToEvenScaledFloat64x8
-	OpRoundToEvenScaledMaskedFloat32x4
-	OpRoundToEvenScaledMaskedFloat32x8
-	OpRoundToEvenScaledMaskedFloat32x16
-	OpRoundToEvenScaledMaskedFloat64x2
-	OpRoundToEvenScaledMaskedFloat64x4
-	OpRoundToEvenScaledMaskedFloat64x8
 	OpRoundToEvenScaledResidueFloat32x4
 	OpRoundToEvenScaledResidueFloat32x8
 	OpRoundToEvenScaledResidueFloat32x16
 	OpRoundToEvenScaledResidueFloat64x2
 	OpRoundToEvenScaledResidueFloat64x4
 	OpRoundToEvenScaledResidueFloat64x8
-	OpRoundToEvenScaledResidueMaskedFloat32x4
-	OpRoundToEvenScaledResidueMaskedFloat32x8
-	OpRoundToEvenScaledResidueMaskedFloat32x16
-	OpRoundToEvenScaledResidueMaskedFloat64x2
-	OpRoundToEvenScaledResidueMaskedFloat64x4
-	OpRoundToEvenScaledResidueMaskedFloat64x8
 	OpSetElemFloat32x4
 	OpSetElemFloat64x2
 	OpSetElemInt8x16
@@ -6453,24 +5649,6 @@ const (
 	OpShiftAllLeftConcatInt64x2
 	OpShiftAllLeftConcatInt64x4
 	OpShiftAllLeftConcatInt64x8
-	OpShiftAllLeftConcatMaskedInt16x8
-	OpShiftAllLeftConcatMaskedInt16x16
-	OpShiftAllLeftConcatMaskedInt16x32
-	OpShiftAllLeftConcatMaskedInt32x4
-	OpShiftAllLeftConcatMaskedInt32x8
-	OpShiftAllLeftConcatMaskedInt32x16
-	OpShiftAllLeftConcatMaskedInt64x2
-	OpShiftAllLeftConcatMaskedInt64x4
-	OpShiftAllLeftConcatMaskedInt64x8
-	OpShiftAllLeftConcatMaskedUint16x8
-	OpShiftAllLeftConcatMaskedUint16x16
-	OpShiftAllLeftConcatMaskedUint16x32
-	OpShiftAllLeftConcatMaskedUint32x4
-	OpShiftAllLeftConcatMaskedUint32x8
-	OpShiftAllLeftConcatMaskedUint32x16
-	OpShiftAllLeftConcatMaskedUint64x2
-	OpShiftAllLeftConcatMaskedUint64x4
-	OpShiftAllLeftConcatMaskedUint64x8
 	OpShiftAllLeftConcatUint16x8
 	OpShiftAllLeftConcatUint16x16
 	OpShiftAllLeftConcatUint16x32
@@ -6489,24 +5667,6 @@ const (
 	OpShiftAllRightConcatInt64x2
 	OpShiftAllRightConcatInt64x4
 	OpShiftAllRightConcatInt64x8
-	OpShiftAllRightConcatMaskedInt16x8
-	OpShiftAllRightConcatMaskedInt16x16
-	OpShiftAllRightConcatMaskedInt16x32
-	OpShiftAllRightConcatMaskedInt32x4
-	OpShiftAllRightConcatMaskedInt32x8
-	OpShiftAllRightConcatMaskedInt32x16
-	OpShiftAllRightConcatMaskedInt64x2
-	OpShiftAllRightConcatMaskedInt64x4
-	OpShiftAllRightConcatMaskedInt64x8
-	OpShiftAllRightConcatMaskedUint16x8
-	OpShiftAllRightConcatMaskedUint16x16
-	OpShiftAllRightConcatMaskedUint16x32
-	OpShiftAllRightConcatMaskedUint32x4
-	OpShiftAllRightConcatMaskedUint32x8
-	OpShiftAllRightConcatMaskedUint32x16
-	OpShiftAllRightConcatMaskedUint64x2
-	OpShiftAllRightConcatMaskedUint64x4
-	OpShiftAllRightConcatMaskedUint64x8
 	OpShiftAllRightConcatUint16x8
 	OpShiftAllRightConcatUint16x16
 	OpShiftAllRightConcatUint16x32
@@ -6522,24 +5682,12 @@ const (
 	OpTruncScaledFloat64x2
 	OpTruncScaledFloat64x4
 	OpTruncScaledFloat64x8
-	OpTruncScaledMaskedFloat32x4
-	OpTruncScaledMaskedFloat32x8
-	OpTruncScaledMaskedFloat32x16
-	OpTruncScaledMaskedFloat64x2
-	OpTruncScaledMaskedFloat64x4
-	OpTruncScaledMaskedFloat64x8
 	OpTruncScaledResidueFloat32x4
 	OpTruncScaledResidueFloat32x8
 	OpTruncScaledResidueFloat32x16
 	OpTruncScaledResidueFloat64x2
 	OpTruncScaledResidueFloat64x4
 	OpTruncScaledResidueFloat64x8
-	OpTruncScaledResidueMaskedFloat32x4
-	OpTruncScaledResidueMaskedFloat32x8
-	OpTruncScaledResidueMaskedFloat32x16
-	OpTruncScaledResidueMaskedFloat64x2
-	OpTruncScaledResidueMaskedFloat64x4
-	OpTruncScaledResidueMaskedFloat64x8
 )
 
 var opcodeTable = [...]opInfo{
@@ -63838,66 +62986,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "AbsMaskedInt8x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "AbsMaskedInt8x32",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "AbsMaskedInt8x64",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "AbsMaskedInt16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "AbsMaskedInt16x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "AbsMaskedInt16x32",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "AbsMaskedInt32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "AbsMaskedInt32x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "AbsMaskedInt32x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "AbsMaskedInt64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "AbsMaskedInt64x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "AbsMaskedInt64x8",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "AddDotProdPairsSaturatedInt32x4",
 		argLen:  3,
@@ -63913,21 +63001,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "AddDotProdPairsSaturatedMaskedInt32x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "AddDotProdPairsSaturatedMaskedInt32x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "AddDotProdPairsSaturatedMaskedInt32x16",
-		argLen:  4,
-		generic: true,
-	},
 	{
 		name:    "AddDotProdQuadrupleInt32x4",
 		argLen:  3,
@@ -63943,21 +63016,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "AddDotProdQuadrupleMaskedInt32x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "AddDotProdQuadrupleMaskedInt32x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "AddDotProdQuadrupleMaskedInt32x16",
-		argLen:  4,
-		generic: true,
-	},
 	{
 		name:    "AddDotProdQuadrupleSaturatedInt32x4",
 		argLen:  3,
@@ -63973,21 +63031,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "AddDotProdQuadrupleSaturatedMaskedInt32x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "AddDotProdQuadrupleSaturatedMaskedInt32x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "AddDotProdQuadrupleSaturatedMaskedInt32x16",
-		argLen:  4,
-		generic: true,
-	},
 	{
 		name:        "AddFloat32x4",
 		argLen:      2,
@@ -64096,186 +63139,6 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "AddMaskedFloat32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedFloat32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedFloat32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedFloat64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedFloat64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedFloat64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedInt8x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedInt8x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedInt8x64",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedInt16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedInt16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedInt16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedInt32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedInt32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedInt32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedInt64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedInt64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedInt64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedUint8x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedUint8x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedUint8x64",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedUint16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedUint16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedUint16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedUint32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedUint32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedUint32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedUint64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedUint64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddMaskedUint64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:    "AddPairsFloat32x4",
 		argLen:  2,
@@ -64382,78 +63245,6 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "AddSaturatedMaskedInt8x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddSaturatedMaskedInt8x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddSaturatedMaskedInt8x64",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddSaturatedMaskedInt16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddSaturatedMaskedInt16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddSaturatedMaskedInt16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddSaturatedMaskedUint8x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddSaturatedMaskedUint8x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddSaturatedMaskedUint8x64",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddSaturatedMaskedUint16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddSaturatedMaskedUint16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AddSaturatedMaskedUint16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:        "AddSaturatedUint8x16",
 		argLen:      2,
@@ -64654,78 +63445,6 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "AndMaskedInt32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AndMaskedInt32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AndMaskedInt32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AndMaskedInt64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AndMaskedInt64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AndMaskedInt64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AndMaskedUint32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AndMaskedUint32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AndMaskedUint32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AndMaskedUint64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AndMaskedUint64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AndMaskedUint64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:    "AndNotInt8x16",
 		argLen:  2,
@@ -64786,66 +63505,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "AndNotMaskedInt32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "AndNotMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "AndNotMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "AndNotMaskedInt64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "AndNotMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "AndNotMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "AndNotMaskedUint32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "AndNotMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "AndNotMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "AndNotMaskedUint64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "AndNotMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "AndNotMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "AndNotUint8x16",
 		argLen:  2,
@@ -64978,42 +63637,6 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "AverageMaskedUint8x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AverageMaskedUint8x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AverageMaskedUint8x64",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AverageMaskedUint16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AverageMaskedUint16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "AverageMaskedUint16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:        "AverageUint8x16",
 		argLen:      2,
@@ -65080,56 +63703,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "Broadcast128MaskedFloat32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast128MaskedFloat64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast128MaskedInt8x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast128MaskedInt16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast128MaskedInt32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast128MaskedInt64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast128MaskedUint8x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast128MaskedUint16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast128MaskedUint32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast128MaskedUint64x2",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "Broadcast128Uint8x16",
 		argLen:  1,
@@ -65180,56 +63753,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "Broadcast256MaskedFloat32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast256MaskedFloat64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast256MaskedInt8x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast256MaskedInt16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast256MaskedInt32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast256MaskedInt64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast256MaskedUint8x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast256MaskedUint16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast256MaskedUint32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast256MaskedUint64x2",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "Broadcast256Uint8x16",
 		argLen:  1,
@@ -65280,56 +63803,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "Broadcast512MaskedFloat32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast512MaskedFloat64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast512MaskedInt8x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast512MaskedInt16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast512MaskedInt32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast512MaskedInt64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast512MaskedUint8x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast512MaskedUint16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast512MaskedUint32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "Broadcast512MaskedUint64x2",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "Broadcast512Uint8x16",
 		argLen:  1,
@@ -65535,21 +64008,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "ConvertToInt32MaskedFloat32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ConvertToInt32MaskedFloat32x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ConvertToInt32MaskedFloat32x16",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "ConvertToUint32Float32x4",
 		argLen:  1,
@@ -65565,21 +64023,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "ConvertToUint32MaskedFloat32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ConvertToUint32MaskedFloat32x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ConvertToUint32MaskedFloat32x16",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "CopySignInt8x16",
 		argLen:  2,
@@ -65640,36 +64083,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "DivMaskedFloat32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "DivMaskedFloat32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "DivMaskedFloat32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "DivMaskedFloat64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "DivMaskedFloat64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "DivMaskedFloat64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "DotProdPairsInt16x8",
 		argLen:  2,
@@ -65685,36 +64098,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "DotProdPairsMaskedInt16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "DotProdPairsMaskedInt16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "DotProdPairsMaskedInt16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "DotProdPairsSaturatedMaskedUint8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "DotProdPairsSaturatedMaskedUint8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "DotProdPairsSaturatedMaskedUint8x64",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "DotProdPairsSaturatedUint8x16",
 		argLen:  2,
@@ -65838,186 +64221,6 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "EqualMaskedFloat32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedFloat32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedFloat32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedFloat64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedFloat64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedFloat64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedInt8x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedInt8x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedInt8x64",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedInt16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedInt16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedInt16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedInt32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedInt32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedInt32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedInt64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedInt64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedInt64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedUint8x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedUint8x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedUint8x64",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedUint16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedUint16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedUint16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedUint32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedUint32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedUint32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedUint64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedUint64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "EqualMaskedUint64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:        "EqualUint8x16",
 		argLen:      2,
@@ -66260,21 +64463,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "GaloisFieldMulMaskedUint8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GaloisFieldMulMaskedUint8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GaloisFieldMulMaskedUint8x64",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "GaloisFieldMulUint8x16",
 		argLen:  2,
@@ -66540,156 +64728,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "GreaterEqualMaskedFloat32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedFloat32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedFloat32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedFloat64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedFloat64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedFloat64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedInt8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedInt8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedInt8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedInt16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedInt16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedInt16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedInt32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedInt64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedUint8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedUint8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedUint8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedUint16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedUint16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedUint16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedUint32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedUint64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "GreaterEqualUint8x64",
 		argLen:  2,
@@ -66800,156 +64838,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "GreaterMaskedFloat32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedFloat32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedFloat32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedFloat64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedFloat64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedFloat64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedInt8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedInt8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedInt8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedInt16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedInt16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedInt16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedInt32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedInt64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedUint8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedUint8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedUint8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedUint16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedUint16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedUint16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedUint32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedUint64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GreaterMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "GreaterUint8x64",
 		argLen:  2,
@@ -67006,42 +64894,6 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "IsNanMaskedFloat32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "IsNanMaskedFloat32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "IsNanMaskedFloat32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "IsNanMaskedFloat64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "IsNanMaskedFloat64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "IsNanMaskedFloat64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:    "LessEqualFloat32x4",
 		argLen:  2,
@@ -67092,156 +64944,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "LessEqualMaskedFloat32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedFloat32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedFloat32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedFloat64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedFloat64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedFloat64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedInt8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedInt8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedInt8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedInt16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedInt16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedInt16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedInt32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedInt64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedUint8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedUint8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedUint8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedUint16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedUint16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedUint16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedUint32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedUint64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessEqualMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "LessEqualUint8x64",
 		argLen:  2,
@@ -67312,156 +65014,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "LessMaskedFloat32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedFloat32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedFloat32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedFloat64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedFloat64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedFloat64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedInt8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedInt8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedInt8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedInt16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedInt16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedInt16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedInt32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedInt64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedUint8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedUint8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedUint8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedUint16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedUint16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedUint16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedUint32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedUint64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "LessMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "LessUint8x64",
 		argLen:  2,
@@ -67590,186 +65142,6 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "MaxMaskedFloat32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedFloat32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedFloat32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedFloat64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedFloat64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedFloat64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedInt8x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedInt8x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedInt8x64",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedInt16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedInt16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedInt16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedInt32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedInt32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedInt32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedInt64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedInt64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedInt64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedUint8x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedUint8x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedUint8x64",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedUint16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedUint16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedUint16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedUint32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedUint32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedUint32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedUint64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedUint64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MaxMaskedUint64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:        "MaxUint8x16",
 		argLen:      2,
@@ -67950,186 +65322,6 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "MinMaskedFloat32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedFloat32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedFloat32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedFloat64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedFloat64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedFloat64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedInt8x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedInt8x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedInt8x64",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedInt16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedInt16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedInt16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedInt32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedInt32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedInt32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedInt64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedInt64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedInt64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedUint8x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedUint8x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedUint8x64",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedUint16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedUint16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedUint16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedUint32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedUint32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedUint32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedUint64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedUint64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MinMaskedUint64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:        "MinUint8x16",
 		argLen:      2,
@@ -68232,36 +65424,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "MulAddMaskedFloat32x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulAddMaskedFloat32x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulAddMaskedFloat32x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulAddMaskedFloat64x2",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulAddMaskedFloat64x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulAddMaskedFloat64x8",
-		argLen:  4,
-		generic: true,
-	},
 	{
 		name:    "MulAddSubFloat32x4",
 		argLen:  3,
@@ -68292,36 +65454,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "MulAddSubMaskedFloat32x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulAddSubMaskedFloat32x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulAddSubMaskedFloat32x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulAddSubMaskedFloat64x2",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulAddSubMaskedFloat64x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulAddSubMaskedFloat64x8",
-		argLen:  4,
-		generic: true,
-	},
 	{
 		name:        "MulEvenWidenInt32x4",
 		argLen:      2,
@@ -68400,42 +65532,6 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "MulHighMaskedInt16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulHighMaskedInt16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulHighMaskedInt16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulHighMaskedUint16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulHighMaskedUint16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulHighMaskedUint16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:        "MulHighUint16x8",
 		argLen:      2,
@@ -68508,150 +65604,6 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "MulMaskedFloat32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedFloat32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedFloat32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedFloat64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedFloat64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedFloat64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedInt16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedInt16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedInt16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedInt32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedInt32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedInt32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedInt64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedInt64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedInt64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedUint16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedUint16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedUint16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedUint32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedUint32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedUint32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedUint64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedUint64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "MulMaskedUint64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:    "MulSubAddFloat32x4",
 		argLen:  3,
@@ -68682,36 +65634,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "MulSubAddMaskedFloat32x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulSubAddMaskedFloat32x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulSubAddMaskedFloat32x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulSubAddMaskedFloat64x2",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulSubAddMaskedFloat64x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "MulSubAddMaskedFloat64x8",
-		argLen:  4,
-		generic: true,
-	},
 	{
 		name:        "MulUint16x8",
 		argLen:      2,
@@ -68826,186 +65748,6 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "NotEqualMaskedFloat32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedFloat32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedFloat32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedFloat64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedFloat64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedFloat64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedInt8x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedInt8x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedInt8x64",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedInt16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedInt16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedInt16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedInt32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedInt32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedInt32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedInt64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedInt64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedInt64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedUint8x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedUint8x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedUint8x64",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedUint16x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedUint16x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedUint16x32",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedUint32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedUint32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedUint32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedUint64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedUint64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualMaskedUint64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:        "NotEqualUint8x64",
 		argLen:      2,
@@ -69090,126 +65832,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "OnesCountMaskedInt8x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedInt8x32",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedInt8x64",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedInt16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedInt16x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedInt16x32",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedInt32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedInt32x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedInt32x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedInt64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedInt64x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedInt64x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedUint8x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedUint8x32",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedUint8x64",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedUint16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedUint16x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedUint16x32",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedUint32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedUint32x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedUint32x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedUint64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedUint64x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "OnesCountMaskedUint64x8",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "OnesCountUint8x16",
 		argLen:  1,
@@ -69342,78 +65964,6 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "OrMaskedInt32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "OrMaskedInt32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "OrMaskedInt32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "OrMaskedInt64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "OrMaskedInt64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "OrMaskedInt64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "OrMaskedUint32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "OrMaskedUint32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "OrMaskedUint32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "OrMaskedUint64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "OrMaskedUint64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "OrMaskedUint64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:        "OrUint8x16",
 		argLen:      2,
@@ -69576,156 +66126,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "Permute2MaskedFloat32x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedFloat32x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedFloat32x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedFloat64x2",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedFloat64x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedFloat64x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedInt8x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedInt8x32",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedInt8x64",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedInt16x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedInt16x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedInt16x32",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedInt32x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedInt32x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedInt32x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedInt64x2",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedInt64x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedInt64x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedUint8x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedUint8x32",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedUint8x64",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedUint16x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedUint16x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedUint16x32",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedUint32x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedUint32x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedUint32x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedUint64x2",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedUint64x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "Permute2MaskedUint64x8",
-		argLen:  4,
-		generic: true,
-	},
 	{
 		name:    "Permute2Uint8x16",
 		argLen:  3,
@@ -69856,126 +66256,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "PermuteMaskedFloat32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedFloat32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedFloat64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedFloat64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedInt8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedInt8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedInt8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedInt16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedInt16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedInt16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedUint8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedUint8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedUint8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedUint16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedUint16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedUint16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "PermuteMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "PermuteUint8x16",
 		argLen:  2,
@@ -70056,36 +66336,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "ReciprocalMaskedFloat32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ReciprocalMaskedFloat32x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ReciprocalMaskedFloat32x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ReciprocalMaskedFloat64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ReciprocalMaskedFloat64x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ReciprocalMaskedFloat64x8",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "ReciprocalSqrtFloat32x4",
 		argLen:  1,
@@ -70116,36 +66366,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "ReciprocalSqrtMaskedFloat32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ReciprocalSqrtMaskedFloat32x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ReciprocalSqrtMaskedFloat32x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ReciprocalSqrtMaskedFloat64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ReciprocalSqrtMaskedFloat64x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ReciprocalSqrtMaskedFloat64x8",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "RotateLeftInt32x4",
 		argLen:  2,
@@ -70176,66 +66396,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "RotateLeftMaskedInt32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateLeftMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateLeftMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateLeftMaskedInt64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateLeftMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateLeftMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateLeftMaskedUint32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateLeftMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateLeftMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateLeftMaskedUint64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateLeftMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateLeftMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "RotateLeftUint32x4",
 		argLen:  2,
@@ -70296,66 +66456,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "RotateRightMaskedInt32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateRightMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateRightMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateRightMaskedInt64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateRightMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateRightMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateRightMaskedUint32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateRightMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateRightMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateRightMaskedUint64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateRightMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "RotateRightMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "RotateRightUint32x4",
 		argLen:  2,
@@ -70436,36 +66536,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "ScaleMaskedFloat32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ScaleMaskedFloat32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ScaleMaskedFloat32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ScaleMaskedFloat64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ScaleMaskedFloat64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ScaleMaskedFloat64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "SetHiFloat32x8",
 		argLen:  2,
@@ -70711,96 +66781,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "ShiftAllLeftMaskedInt16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedInt16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedInt16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedInt32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedInt64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedUint16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedUint16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedUint16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedUint32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedUint64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "ShiftAllLeftUint16x8",
 		argLen:  2,
@@ -70891,96 +66871,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "ShiftAllRightMaskedInt16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedInt16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedInt16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedInt32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedInt64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedUint16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedUint16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedUint16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedUint32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedUint64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "ShiftAllRightUint16x8",
 		argLen:  2,
@@ -71071,96 +66961,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "ShiftLeftConcatMaskedInt16x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedInt16x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedInt16x32",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedInt32x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedInt32x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedInt32x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedInt64x2",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedInt64x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedInt64x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedUint16x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedUint16x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedUint16x32",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedUint32x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedUint32x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedUint32x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedUint64x2",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedUint64x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftConcatMaskedUint64x8",
-		argLen:  4,
-		generic: true,
-	},
 	{
 		name:    "ShiftLeftConcatUint16x8",
 		argLen:  3,
@@ -71251,96 +67051,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "ShiftLeftMaskedInt16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedInt16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedInt16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedInt32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedInt64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedUint16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedUint16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedUint16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedUint32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedUint64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftLeftMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "ShiftLeftUint16x8",
 		argLen:  2,
@@ -71431,96 +67141,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "ShiftRightConcatMaskedInt16x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedInt16x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedInt16x32",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedInt32x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedInt32x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedInt32x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedInt64x2",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedInt64x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedInt64x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedUint16x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedUint16x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedUint16x32",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedUint32x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedUint32x8",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedUint32x16",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedUint64x2",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedUint64x4",
-		argLen:  4,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightConcatMaskedUint64x8",
-		argLen:  4,
-		generic: true,
-	},
 	{
 		name:    "ShiftRightConcatUint16x8",
 		argLen:  3,
@@ -71611,96 +67231,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "ShiftRightMaskedInt16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedInt16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedInt16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedInt32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedInt64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedUint16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedUint16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedUint16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedUint32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedUint64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "ShiftRightUint16x8",
 		argLen:  2,
@@ -71776,36 +67306,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "SqrtMaskedFloat32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "SqrtMaskedFloat32x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "SqrtMaskedFloat32x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "SqrtMaskedFloat64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "SqrtMaskedFloat64x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "SqrtMaskedFloat64x8",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "SubFloat32x4",
 		argLen:  2,
@@ -71896,156 +67396,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "SubMaskedFloat32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedFloat32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedFloat32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedFloat64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedFloat64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedFloat64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedInt8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedInt8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedInt8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedInt16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedInt16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedInt16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedInt32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedInt64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedUint8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedUint8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedUint8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedUint16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedUint16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedUint16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedUint32x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedUint64x2",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "SubPairsFloat32x4",
 		argLen:  2,
@@ -72146,66 +67496,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "SubSaturatedMaskedInt8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubSaturatedMaskedInt8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubSaturatedMaskedInt8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubSaturatedMaskedInt16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubSaturatedMaskedInt16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubSaturatedMaskedInt16x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubSaturatedMaskedUint8x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubSaturatedMaskedUint8x32",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubSaturatedMaskedUint8x64",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubSaturatedMaskedUint16x8",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubSaturatedMaskedUint16x16",
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "SubSaturatedMaskedUint16x32",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "SubSaturatedUint8x16",
 		argLen:  2,
@@ -72388,78 +67678,6 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "XorMaskedInt32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "XorMaskedInt32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "XorMaskedInt32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "XorMaskedInt64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "XorMaskedInt64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "XorMaskedInt64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "XorMaskedUint32x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "XorMaskedUint32x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "XorMaskedUint32x16",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "XorMaskedUint64x2",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "XorMaskedUint64x4",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "XorMaskedUint64x8",
-		argLen:      3,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:        "XorUint8x16",
 		argLen:      2,
@@ -72598,42 +67816,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "CeilScaledMaskedFloat32x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "CeilScaledMaskedFloat32x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "CeilScaledMaskedFloat32x16",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "CeilScaledMaskedFloat64x2",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "CeilScaledMaskedFloat64x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "CeilScaledMaskedFloat64x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "CeilScaledResidueFloat32x4",
 		auxType: auxUInt8,
@@ -72670,42 +67852,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "CeilScaledResidueMaskedFloat32x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "CeilScaledResidueMaskedFloat32x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "CeilScaledResidueMaskedFloat32x16",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "CeilScaledResidueMaskedFloat64x2",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "CeilScaledResidueMaskedFloat64x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "CeilScaledResidueMaskedFloat64x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "FloorScaledFloat32x4",
 		auxType: auxUInt8,
@@ -72742,42 +67888,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "FloorScaledMaskedFloat32x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "FloorScaledMaskedFloat32x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "FloorScaledMaskedFloat32x16",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "FloorScaledMaskedFloat64x2",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "FloorScaledMaskedFloat64x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "FloorScaledMaskedFloat64x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "FloorScaledResidueFloat32x4",
 		auxType: auxUInt8,
@@ -72814,60 +67924,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "FloorScaledResidueMaskedFloat32x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "FloorScaledResidueMaskedFloat32x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "FloorScaledResidueMaskedFloat32x16",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "FloorScaledResidueMaskedFloat64x2",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "FloorScaledResidueMaskedFloat64x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "FloorScaledResidueMaskedFloat64x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "GaloisFieldAffineTransformInverseMaskedUint8x16",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GaloisFieldAffineTransformInverseMaskedUint8x32",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GaloisFieldAffineTransformInverseMaskedUint8x64",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "GaloisFieldAffineTransformInverseUint8x16",
 		auxType: auxUInt8,
@@ -72886,24 +67942,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "GaloisFieldAffineTransformMaskedUint8x16",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GaloisFieldAffineTransformMaskedUint8x32",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "GaloisFieldAffineTransformMaskedUint8x64",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "GaloisFieldAffineTransformUint8x16",
 		auxType: auxUInt8,
@@ -73018,78 +68056,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "RotateAllLeftMaskedInt32x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllLeftMaskedInt32x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllLeftMaskedInt32x16",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllLeftMaskedInt64x2",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllLeftMaskedInt64x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllLeftMaskedInt64x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllLeftMaskedUint32x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllLeftMaskedUint32x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllLeftMaskedUint32x16",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllLeftMaskedUint64x2",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllLeftMaskedUint64x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllLeftMaskedUint64x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "RotateAllLeftUint32x4",
 		auxType: auxUInt8,
@@ -73162,78 +68128,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "RotateAllRightMaskedInt32x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllRightMaskedInt32x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllRightMaskedInt32x16",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllRightMaskedInt64x2",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllRightMaskedInt64x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllRightMaskedInt64x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllRightMaskedUint32x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllRightMaskedUint32x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllRightMaskedUint32x16",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllRightMaskedUint64x2",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllRightMaskedUint64x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RotateAllRightMaskedUint64x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "RotateAllRightUint32x4",
 		auxType: auxUInt8,
@@ -73306,42 +68200,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "RoundToEvenScaledMaskedFloat32x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RoundToEvenScaledMaskedFloat32x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RoundToEvenScaledMaskedFloat32x16",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RoundToEvenScaledMaskedFloat64x2",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RoundToEvenScaledMaskedFloat64x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RoundToEvenScaledMaskedFloat64x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "RoundToEvenScaledResidueFloat32x4",
 		auxType: auxUInt8,
@@ -73378,42 +68236,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "RoundToEvenScaledResidueMaskedFloat32x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RoundToEvenScaledResidueMaskedFloat32x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RoundToEvenScaledResidueMaskedFloat32x16",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RoundToEvenScaledResidueMaskedFloat64x2",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RoundToEvenScaledResidueMaskedFloat64x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "RoundToEvenScaledResidueMaskedFloat64x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "SetElemFloat32x4",
 		auxType: auxUInt8,
@@ -73528,114 +68350,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "ShiftAllLeftConcatMaskedInt16x8",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedInt16x16",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedInt16x32",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedInt32x4",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedInt32x8",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedInt32x16",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedInt64x2",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedInt64x4",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedInt64x8",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedUint16x8",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedUint16x16",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedUint16x32",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedUint32x4",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedUint32x8",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedUint32x16",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedUint64x2",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedUint64x4",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllLeftConcatMaskedUint64x8",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "ShiftAllLeftConcatUint16x8",
 		auxType: auxUInt8,
@@ -73744,114 +68458,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "ShiftAllRightConcatMaskedInt16x8",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedInt16x16",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedInt16x32",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedInt32x4",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedInt32x8",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedInt32x16",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedInt64x2",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedInt64x4",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedInt64x8",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedUint16x8",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedUint16x16",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedUint16x32",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedUint32x4",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedUint32x8",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedUint32x16",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedUint64x2",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedUint64x4",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightConcatMaskedUint64x8",
-		auxType: auxUInt8,
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "ShiftAllRightConcatUint16x8",
 		auxType: auxUInt8,
@@ -73942,42 +68548,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "TruncScaledMaskedFloat32x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "TruncScaledMaskedFloat32x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "TruncScaledMaskedFloat32x16",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "TruncScaledMaskedFloat64x2",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "TruncScaledMaskedFloat64x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "TruncScaledMaskedFloat64x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "TruncScaledResidueFloat32x4",
 		auxType: auxUInt8,
@@ -74014,42 +68584,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  1,
 		generic: true,
 	},
-	{
-		name:    "TruncScaledResidueMaskedFloat32x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "TruncScaledResidueMaskedFloat32x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "TruncScaledResidueMaskedFloat32x16",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "TruncScaledResidueMaskedFloat64x2",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "TruncScaledResidueMaskedFloat64x4",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "TruncScaledResidueMaskedFloat64x8",
-		auxType: auxUInt8,
-		argLen:  2,
-		generic: true,
-	},
 }
 
 func (o Op) Asm() obj.As          { return opcodeTable[o].asm }
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 69393014c78..87b1e0586d7 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -537,72 +537,36 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpAMD64VPSLLD256(v)
 	case OpAMD64VPSLLD512:
 		return rewriteValueAMD64_OpAMD64VPSLLD512(v)
-	case OpAMD64VPSLLDMasked128:
-		return rewriteValueAMD64_OpAMD64VPSLLDMasked128(v)
-	case OpAMD64VPSLLDMasked256:
-		return rewriteValueAMD64_OpAMD64VPSLLDMasked256(v)
-	case OpAMD64VPSLLDMasked512:
-		return rewriteValueAMD64_OpAMD64VPSLLDMasked512(v)
 	case OpAMD64VPSLLQ128:
 		return rewriteValueAMD64_OpAMD64VPSLLQ128(v)
 	case OpAMD64VPSLLQ256:
 		return rewriteValueAMD64_OpAMD64VPSLLQ256(v)
 	case OpAMD64VPSLLQ512:
 		return rewriteValueAMD64_OpAMD64VPSLLQ512(v)
-	case OpAMD64VPSLLQMasked128:
-		return rewriteValueAMD64_OpAMD64VPSLLQMasked128(v)
-	case OpAMD64VPSLLQMasked256:
-		return rewriteValueAMD64_OpAMD64VPSLLQMasked256(v)
-	case OpAMD64VPSLLQMasked512:
-		return rewriteValueAMD64_OpAMD64VPSLLQMasked512(v)
 	case OpAMD64VPSLLW128:
 		return rewriteValueAMD64_OpAMD64VPSLLW128(v)
 	case OpAMD64VPSLLW256:
 		return rewriteValueAMD64_OpAMD64VPSLLW256(v)
 	case OpAMD64VPSLLW512:
 		return rewriteValueAMD64_OpAMD64VPSLLW512(v)
-	case OpAMD64VPSLLWMasked128:
-		return rewriteValueAMD64_OpAMD64VPSLLWMasked128(v)
-	case OpAMD64VPSLLWMasked256:
-		return rewriteValueAMD64_OpAMD64VPSLLWMasked256(v)
-	case OpAMD64VPSLLWMasked512:
-		return rewriteValueAMD64_OpAMD64VPSLLWMasked512(v)
 	case OpAMD64VPSRAD128:
 		return rewriteValueAMD64_OpAMD64VPSRAD128(v)
 	case OpAMD64VPSRAD256:
 		return rewriteValueAMD64_OpAMD64VPSRAD256(v)
 	case OpAMD64VPSRAD512:
 		return rewriteValueAMD64_OpAMD64VPSRAD512(v)
-	case OpAMD64VPSRADMasked128:
-		return rewriteValueAMD64_OpAMD64VPSRADMasked128(v)
-	case OpAMD64VPSRADMasked256:
-		return rewriteValueAMD64_OpAMD64VPSRADMasked256(v)
-	case OpAMD64VPSRADMasked512:
-		return rewriteValueAMD64_OpAMD64VPSRADMasked512(v)
 	case OpAMD64VPSRAQ128:
 		return rewriteValueAMD64_OpAMD64VPSRAQ128(v)
 	case OpAMD64VPSRAQ256:
 		return rewriteValueAMD64_OpAMD64VPSRAQ256(v)
 	case OpAMD64VPSRAQ512:
 		return rewriteValueAMD64_OpAMD64VPSRAQ512(v)
-	case OpAMD64VPSRAQMasked128:
-		return rewriteValueAMD64_OpAMD64VPSRAQMasked128(v)
-	case OpAMD64VPSRAQMasked256:
-		return rewriteValueAMD64_OpAMD64VPSRAQMasked256(v)
-	case OpAMD64VPSRAQMasked512:
-		return rewriteValueAMD64_OpAMD64VPSRAQMasked512(v)
 	case OpAMD64VPSRAW128:
 		return rewriteValueAMD64_OpAMD64VPSRAW128(v)
 	case OpAMD64VPSRAW256:
 		return rewriteValueAMD64_OpAMD64VPSRAW256(v)
 	case OpAMD64VPSRAW512:
 		return rewriteValueAMD64_OpAMD64VPSRAW512(v)
-	case OpAMD64VPSRAWMasked128:
-		return rewriteValueAMD64_OpAMD64VPSRAWMasked128(v)
-	case OpAMD64VPSRAWMasked256:
-		return rewriteValueAMD64_OpAMD64VPSRAWMasked256(v)
-	case OpAMD64VPSRAWMasked512:
-		return rewriteValueAMD64_OpAMD64VPSRAWMasked512(v)
 	case OpAMD64XADDLlock:
 		return rewriteValueAMD64_OpAMD64XADDLlock(v)
 	case OpAMD64XADDQlock:
@@ -667,30 +631,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpAbsInt8x64:
 		v.Op = OpAMD64VPABSB512
 		return true
-	case OpAbsMaskedInt16x16:
-		return rewriteValueAMD64_OpAbsMaskedInt16x16(v)
-	case OpAbsMaskedInt16x32:
-		return rewriteValueAMD64_OpAbsMaskedInt16x32(v)
-	case OpAbsMaskedInt16x8:
-		return rewriteValueAMD64_OpAbsMaskedInt16x8(v)
-	case OpAbsMaskedInt32x16:
-		return rewriteValueAMD64_OpAbsMaskedInt32x16(v)
-	case OpAbsMaskedInt32x4:
-		return rewriteValueAMD64_OpAbsMaskedInt32x4(v)
-	case OpAbsMaskedInt32x8:
-		return rewriteValueAMD64_OpAbsMaskedInt32x8(v)
-	case OpAbsMaskedInt64x2:
-		return rewriteValueAMD64_OpAbsMaskedInt64x2(v)
-	case OpAbsMaskedInt64x4:
-		return rewriteValueAMD64_OpAbsMaskedInt64x4(v)
-	case OpAbsMaskedInt64x8:
-		return rewriteValueAMD64_OpAbsMaskedInt64x8(v)
-	case OpAbsMaskedInt8x16:
-		return rewriteValueAMD64_OpAbsMaskedInt8x16(v)
-	case OpAbsMaskedInt8x32:
-		return rewriteValueAMD64_OpAbsMaskedInt8x32(v)
-	case OpAbsMaskedInt8x64:
-		return rewriteValueAMD64_OpAbsMaskedInt8x64(v)
 	case OpAdd16:
 		v.Op = OpAMD64ADDL
 		return true
@@ -718,12 +658,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpAddDotProdPairsSaturatedInt32x8:
 		v.Op = OpAMD64VPDPWSSDS256
 		return true
-	case OpAddDotProdPairsSaturatedMaskedInt32x16:
-		return rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x16(v)
-	case OpAddDotProdPairsSaturatedMaskedInt32x4:
-		return rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x4(v)
-	case OpAddDotProdPairsSaturatedMaskedInt32x8:
-		return rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x8(v)
 	case OpAddDotProdQuadrupleInt32x16:
 		v.Op = OpAMD64VPDPBUSD512
 		return true
@@ -733,12 +667,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpAddDotProdQuadrupleInt32x8:
 		v.Op = OpAMD64VPDPBUSD256
 		return true
-	case OpAddDotProdQuadrupleMaskedInt32x16:
-		return rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x16(v)
-	case OpAddDotProdQuadrupleMaskedInt32x4:
-		return rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x4(v)
-	case OpAddDotProdQuadrupleMaskedInt32x8:
-		return rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x8(v)
 	case OpAddDotProdQuadrupleSaturatedInt32x16:
 		v.Op = OpAMD64VPDPBUSDS512
 		return true
@@ -748,12 +676,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpAddDotProdQuadrupleSaturatedInt32x8:
 		v.Op = OpAMD64VPDPBUSDS256
 		return true
-	case OpAddDotProdQuadrupleSaturatedMaskedInt32x16:
-		return rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x16(v)
-	case OpAddDotProdQuadrupleSaturatedMaskedInt32x4:
-		return rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x4(v)
-	case OpAddDotProdQuadrupleSaturatedMaskedInt32x8:
-		return rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x8(v)
 	case OpAddFloat32x16:
 		v.Op = OpAMD64VADDPS512
 		return true
@@ -808,66 +730,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpAddInt8x64:
 		v.Op = OpAMD64VPADDB512
 		return true
-	case OpAddMaskedFloat32x16:
-		return rewriteValueAMD64_OpAddMaskedFloat32x16(v)
-	case OpAddMaskedFloat32x4:
-		return rewriteValueAMD64_OpAddMaskedFloat32x4(v)
-	case OpAddMaskedFloat32x8:
-		return rewriteValueAMD64_OpAddMaskedFloat32x8(v)
-	case OpAddMaskedFloat64x2:
-		return rewriteValueAMD64_OpAddMaskedFloat64x2(v)
-	case OpAddMaskedFloat64x4:
-		return rewriteValueAMD64_OpAddMaskedFloat64x4(v)
-	case OpAddMaskedFloat64x8:
-		return rewriteValueAMD64_OpAddMaskedFloat64x8(v)
-	case OpAddMaskedInt16x16:
-		return rewriteValueAMD64_OpAddMaskedInt16x16(v)
-	case OpAddMaskedInt16x32:
-		return rewriteValueAMD64_OpAddMaskedInt16x32(v)
-	case OpAddMaskedInt16x8:
-		return rewriteValueAMD64_OpAddMaskedInt16x8(v)
-	case OpAddMaskedInt32x16:
-		return rewriteValueAMD64_OpAddMaskedInt32x16(v)
-	case OpAddMaskedInt32x4:
-		return rewriteValueAMD64_OpAddMaskedInt32x4(v)
-	case OpAddMaskedInt32x8:
-		return rewriteValueAMD64_OpAddMaskedInt32x8(v)
-	case OpAddMaskedInt64x2:
-		return rewriteValueAMD64_OpAddMaskedInt64x2(v)
-	case OpAddMaskedInt64x4:
-		return rewriteValueAMD64_OpAddMaskedInt64x4(v)
-	case OpAddMaskedInt64x8:
-		return rewriteValueAMD64_OpAddMaskedInt64x8(v)
-	case OpAddMaskedInt8x16:
-		return rewriteValueAMD64_OpAddMaskedInt8x16(v)
-	case OpAddMaskedInt8x32:
-		return rewriteValueAMD64_OpAddMaskedInt8x32(v)
-	case OpAddMaskedInt8x64:
-		return rewriteValueAMD64_OpAddMaskedInt8x64(v)
-	case OpAddMaskedUint16x16:
-		return rewriteValueAMD64_OpAddMaskedUint16x16(v)
-	case OpAddMaskedUint16x32:
-		return rewriteValueAMD64_OpAddMaskedUint16x32(v)
-	case OpAddMaskedUint16x8:
-		return rewriteValueAMD64_OpAddMaskedUint16x8(v)
-	case OpAddMaskedUint32x16:
-		return rewriteValueAMD64_OpAddMaskedUint32x16(v)
-	case OpAddMaskedUint32x4:
-		return rewriteValueAMD64_OpAddMaskedUint32x4(v)
-	case OpAddMaskedUint32x8:
-		return rewriteValueAMD64_OpAddMaskedUint32x8(v)
-	case OpAddMaskedUint64x2:
-		return rewriteValueAMD64_OpAddMaskedUint64x2(v)
-	case OpAddMaskedUint64x4:
-		return rewriteValueAMD64_OpAddMaskedUint64x4(v)
-	case OpAddMaskedUint64x8:
-		return rewriteValueAMD64_OpAddMaskedUint64x8(v)
-	case OpAddMaskedUint8x16:
-		return rewriteValueAMD64_OpAddMaskedUint8x16(v)
-	case OpAddMaskedUint8x32:
-		return rewriteValueAMD64_OpAddMaskedUint8x32(v)
-	case OpAddMaskedUint8x64:
-		return rewriteValueAMD64_OpAddMaskedUint8x64(v)
 	case OpAddPairsFloat32x4:
 		v.Op = OpAMD64VHADDPS128
 		return true
@@ -931,30 +793,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpAddSaturatedInt8x64:
 		v.Op = OpAMD64VPADDSB512
 		return true
-	case OpAddSaturatedMaskedInt16x16:
-		return rewriteValueAMD64_OpAddSaturatedMaskedInt16x16(v)
-	case OpAddSaturatedMaskedInt16x32:
-		return rewriteValueAMD64_OpAddSaturatedMaskedInt16x32(v)
-	case OpAddSaturatedMaskedInt16x8:
-		return rewriteValueAMD64_OpAddSaturatedMaskedInt16x8(v)
-	case OpAddSaturatedMaskedInt8x16:
-		return rewriteValueAMD64_OpAddSaturatedMaskedInt8x16(v)
-	case OpAddSaturatedMaskedInt8x32:
-		return rewriteValueAMD64_OpAddSaturatedMaskedInt8x32(v)
-	case OpAddSaturatedMaskedInt8x64:
-		return rewriteValueAMD64_OpAddSaturatedMaskedInt8x64(v)
-	case OpAddSaturatedMaskedUint16x16:
-		return rewriteValueAMD64_OpAddSaturatedMaskedUint16x16(v)
-	case OpAddSaturatedMaskedUint16x32:
-		return rewriteValueAMD64_OpAddSaturatedMaskedUint16x32(v)
-	case OpAddSaturatedMaskedUint16x8:
-		return rewriteValueAMD64_OpAddSaturatedMaskedUint16x8(v)
-	case OpAddSaturatedMaskedUint8x16:
-		return rewriteValueAMD64_OpAddSaturatedMaskedUint8x16(v)
-	case OpAddSaturatedMaskedUint8x32:
-		return rewriteValueAMD64_OpAddSaturatedMaskedUint8x32(v)
-	case OpAddSaturatedMaskedUint8x64:
-		return rewriteValueAMD64_OpAddSaturatedMaskedUint8x64(v)
 	case OpAddSaturatedUint16x16:
 		v.Op = OpAMD64VPADDUSW256
 		return true
@@ -1074,30 +912,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpAndInt8x64:
 		v.Op = OpAMD64VPANDD512
 		return true
-	case OpAndMaskedInt32x16:
-		return rewriteValueAMD64_OpAndMaskedInt32x16(v)
-	case OpAndMaskedInt32x4:
-		return rewriteValueAMD64_OpAndMaskedInt32x4(v)
-	case OpAndMaskedInt32x8:
-		return rewriteValueAMD64_OpAndMaskedInt32x8(v)
-	case OpAndMaskedInt64x2:
-		return rewriteValueAMD64_OpAndMaskedInt64x2(v)
-	case OpAndMaskedInt64x4:
-		return rewriteValueAMD64_OpAndMaskedInt64x4(v)
-	case OpAndMaskedInt64x8:
-		return rewriteValueAMD64_OpAndMaskedInt64x8(v)
-	case OpAndMaskedUint32x16:
-		return rewriteValueAMD64_OpAndMaskedUint32x16(v)
-	case OpAndMaskedUint32x4:
-		return rewriteValueAMD64_OpAndMaskedUint32x4(v)
-	case OpAndMaskedUint32x8:
-		return rewriteValueAMD64_OpAndMaskedUint32x8(v)
-	case OpAndMaskedUint64x2:
-		return rewriteValueAMD64_OpAndMaskedUint64x2(v)
-	case OpAndMaskedUint64x4:
-		return rewriteValueAMD64_OpAndMaskedUint64x4(v)
-	case OpAndMaskedUint64x8:
-		return rewriteValueAMD64_OpAndMaskedUint64x8(v)
 	case OpAndNotInt16x16:
 		v.Op = OpAMD64VPANDN256
 		return true
@@ -1134,30 +948,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpAndNotInt8x64:
 		v.Op = OpAMD64VPANDND512
 		return true
-	case OpAndNotMaskedInt32x16:
-		return rewriteValueAMD64_OpAndNotMaskedInt32x16(v)
-	case OpAndNotMaskedInt32x4:
-		return rewriteValueAMD64_OpAndNotMaskedInt32x4(v)
-	case OpAndNotMaskedInt32x8:
-		return rewriteValueAMD64_OpAndNotMaskedInt32x8(v)
-	case OpAndNotMaskedInt64x2:
-		return rewriteValueAMD64_OpAndNotMaskedInt64x2(v)
-	case OpAndNotMaskedInt64x4:
-		return rewriteValueAMD64_OpAndNotMaskedInt64x4(v)
-	case OpAndNotMaskedInt64x8:
-		return rewriteValueAMD64_OpAndNotMaskedInt64x8(v)
-	case OpAndNotMaskedUint32x16:
-		return rewriteValueAMD64_OpAndNotMaskedUint32x16(v)
-	case OpAndNotMaskedUint32x4:
-		return rewriteValueAMD64_OpAndNotMaskedUint32x4(v)
-	case OpAndNotMaskedUint32x8:
-		return rewriteValueAMD64_OpAndNotMaskedUint32x8(v)
-	case OpAndNotMaskedUint64x2:
-		return rewriteValueAMD64_OpAndNotMaskedUint64x2(v)
-	case OpAndNotMaskedUint64x4:
-		return rewriteValueAMD64_OpAndNotMaskedUint64x4(v)
-	case OpAndNotMaskedUint64x8:
-		return rewriteValueAMD64_OpAndNotMaskedUint64x8(v)
 	case OpAndNotUint16x16:
 		v.Op = OpAMD64VPANDN256
 		return true
@@ -1276,18 +1066,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpAtomicStore8(v)
 	case OpAtomicStorePtrNoWB:
 		return rewriteValueAMD64_OpAtomicStorePtrNoWB(v)
-	case OpAverageMaskedUint16x16:
-		return rewriteValueAMD64_OpAverageMaskedUint16x16(v)
-	case OpAverageMaskedUint16x32:
-		return rewriteValueAMD64_OpAverageMaskedUint16x32(v)
-	case OpAverageMaskedUint16x8:
-		return rewriteValueAMD64_OpAverageMaskedUint16x8(v)
-	case OpAverageMaskedUint8x16:
-		return rewriteValueAMD64_OpAverageMaskedUint8x16(v)
-	case OpAverageMaskedUint8x32:
-		return rewriteValueAMD64_OpAverageMaskedUint8x32(v)
-	case OpAverageMaskedUint8x64:
-		return rewriteValueAMD64_OpAverageMaskedUint8x64(v)
 	case OpAverageUint16x16:
 		v.Op = OpAMD64VPAVGW256
 		return true
@@ -1335,26 +1113,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpBroadcast128Int8x16:
 		v.Op = OpAMD64VPBROADCASTB128
 		return true
-	case OpBroadcast128MaskedFloat32x4:
-		return rewriteValueAMD64_OpBroadcast128MaskedFloat32x4(v)
-	case OpBroadcast128MaskedFloat64x2:
-		return rewriteValueAMD64_OpBroadcast128MaskedFloat64x2(v)
-	case OpBroadcast128MaskedInt16x8:
-		return rewriteValueAMD64_OpBroadcast128MaskedInt16x8(v)
-	case OpBroadcast128MaskedInt32x4:
-		return rewriteValueAMD64_OpBroadcast128MaskedInt32x4(v)
-	case OpBroadcast128MaskedInt64x2:
-		return rewriteValueAMD64_OpBroadcast128MaskedInt64x2(v)
-	case OpBroadcast128MaskedInt8x16:
-		return rewriteValueAMD64_OpBroadcast128MaskedInt8x16(v)
-	case OpBroadcast128MaskedUint16x8:
-		return rewriteValueAMD64_OpBroadcast128MaskedUint16x8(v)
-	case OpBroadcast128MaskedUint32x4:
-		return rewriteValueAMD64_OpBroadcast128MaskedUint32x4(v)
-	case OpBroadcast128MaskedUint64x2:
-		return rewriteValueAMD64_OpBroadcast128MaskedUint64x2(v)
-	case OpBroadcast128MaskedUint8x16:
-		return rewriteValueAMD64_OpBroadcast128MaskedUint8x16(v)
 	case OpBroadcast128Uint16x8:
 		v.Op = OpAMD64VPBROADCASTW128
 		return true
@@ -1385,26 +1143,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpBroadcast256Int8x16:
 		v.Op = OpAMD64VPBROADCASTB256
 		return true
-	case OpBroadcast256MaskedFloat32x4:
-		return rewriteValueAMD64_OpBroadcast256MaskedFloat32x4(v)
-	case OpBroadcast256MaskedFloat64x2:
-		return rewriteValueAMD64_OpBroadcast256MaskedFloat64x2(v)
-	case OpBroadcast256MaskedInt16x8:
-		return rewriteValueAMD64_OpBroadcast256MaskedInt16x8(v)
-	case OpBroadcast256MaskedInt32x4:
-		return rewriteValueAMD64_OpBroadcast256MaskedInt32x4(v)
-	case OpBroadcast256MaskedInt64x2:
-		return rewriteValueAMD64_OpBroadcast256MaskedInt64x2(v)
-	case OpBroadcast256MaskedInt8x16:
-		return rewriteValueAMD64_OpBroadcast256MaskedInt8x16(v)
-	case OpBroadcast256MaskedUint16x8:
-		return rewriteValueAMD64_OpBroadcast256MaskedUint16x8(v)
-	case OpBroadcast256MaskedUint32x4:
-		return rewriteValueAMD64_OpBroadcast256MaskedUint32x4(v)
-	case OpBroadcast256MaskedUint64x2:
-		return rewriteValueAMD64_OpBroadcast256MaskedUint64x2(v)
-	case OpBroadcast256MaskedUint8x16:
-		return rewriteValueAMD64_OpBroadcast256MaskedUint8x16(v)
 	case OpBroadcast256Uint16x8:
 		v.Op = OpAMD64VPBROADCASTW256
 		return true
@@ -1435,26 +1173,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpBroadcast512Int8x16:
 		v.Op = OpAMD64VPBROADCASTB512
 		return true
-	case OpBroadcast512MaskedFloat32x4:
-		return rewriteValueAMD64_OpBroadcast512MaskedFloat32x4(v)
-	case OpBroadcast512MaskedFloat64x2:
-		return rewriteValueAMD64_OpBroadcast512MaskedFloat64x2(v)
-	case OpBroadcast512MaskedInt16x8:
-		return rewriteValueAMD64_OpBroadcast512MaskedInt16x8(v)
-	case OpBroadcast512MaskedInt32x4:
-		return rewriteValueAMD64_OpBroadcast512MaskedInt32x4(v)
-	case OpBroadcast512MaskedInt64x2:
-		return rewriteValueAMD64_OpBroadcast512MaskedInt64x2(v)
-	case OpBroadcast512MaskedInt8x16:
-		return rewriteValueAMD64_OpBroadcast512MaskedInt8x16(v)
-	case OpBroadcast512MaskedUint16x8:
-		return rewriteValueAMD64_OpBroadcast512MaskedUint16x8(v)
-	case OpBroadcast512MaskedUint32x4:
-		return rewriteValueAMD64_OpBroadcast512MaskedUint32x4(v)
-	case OpBroadcast512MaskedUint64x2:
-		return rewriteValueAMD64_OpBroadcast512MaskedUint64x2(v)
-	case OpBroadcast512MaskedUint8x16:
-		return rewriteValueAMD64_OpBroadcast512MaskedUint8x16(v)
 	case OpBroadcast512Uint16x8:
 		v.Op = OpAMD64VPBROADCASTW512
 		return true
@@ -1497,18 +1215,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpCeilScaledFloat64x4(v)
 	case OpCeilScaledFloat64x8:
 		return rewriteValueAMD64_OpCeilScaledFloat64x8(v)
-	case OpCeilScaledMaskedFloat32x16:
-		return rewriteValueAMD64_OpCeilScaledMaskedFloat32x16(v)
-	case OpCeilScaledMaskedFloat32x4:
-		return rewriteValueAMD64_OpCeilScaledMaskedFloat32x4(v)
-	case OpCeilScaledMaskedFloat32x8:
-		return rewriteValueAMD64_OpCeilScaledMaskedFloat32x8(v)
-	case OpCeilScaledMaskedFloat64x2:
-		return rewriteValueAMD64_OpCeilScaledMaskedFloat64x2(v)
-	case OpCeilScaledMaskedFloat64x4:
-		return rewriteValueAMD64_OpCeilScaledMaskedFloat64x4(v)
-	case OpCeilScaledMaskedFloat64x8:
-		return rewriteValueAMD64_OpCeilScaledMaskedFloat64x8(v)
 	case OpCeilScaledResidueFloat32x16:
 		return rewriteValueAMD64_OpCeilScaledResidueFloat32x16(v)
 	case OpCeilScaledResidueFloat32x4:
@@ -1521,18 +1227,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpCeilScaledResidueFloat64x4(v)
 	case OpCeilScaledResidueFloat64x8:
 		return rewriteValueAMD64_OpCeilScaledResidueFloat64x8(v)
-	case OpCeilScaledResidueMaskedFloat32x16:
-		return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x16(v)
-	case OpCeilScaledResidueMaskedFloat32x4:
-		return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x4(v)
-	case OpCeilScaledResidueMaskedFloat32x8:
-		return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x8(v)
-	case OpCeilScaledResidueMaskedFloat64x2:
-		return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x2(v)
-	case OpCeilScaledResidueMaskedFloat64x4:
-		return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x4(v)
-	case OpCeilScaledResidueMaskedFloat64x8:
-		return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x8(v)
 	case OpClosureCall:
 		v.Op = OpAMD64CALLclosure
 		return true
@@ -1639,12 +1333,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpConvertToInt32Float32x8:
 		v.Op = OpAMD64VCVTTPS2DQ256
 		return true
-	case OpConvertToInt32MaskedFloat32x16:
-		return rewriteValueAMD64_OpConvertToInt32MaskedFloat32x16(v)
-	case OpConvertToInt32MaskedFloat32x4:
-		return rewriteValueAMD64_OpConvertToInt32MaskedFloat32x4(v)
-	case OpConvertToInt32MaskedFloat32x8:
-		return rewriteValueAMD64_OpConvertToInt32MaskedFloat32x8(v)
 	case OpConvertToUint32Float32x16:
 		v.Op = OpAMD64VCVTPS2UDQ512
 		return true
@@ -1654,12 +1342,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpConvertToUint32Float32x8:
 		v.Op = OpAMD64VCVTPS2UDQ256
 		return true
-	case OpConvertToUint32MaskedFloat32x16:
-		return rewriteValueAMD64_OpConvertToUint32MaskedFloat32x16(v)
-	case OpConvertToUint32MaskedFloat32x4:
-		return rewriteValueAMD64_OpConvertToUint32MaskedFloat32x4(v)
-	case OpConvertToUint32MaskedFloat32x8:
-		return rewriteValueAMD64_OpConvertToUint32MaskedFloat32x8(v)
 	case OpCopySignInt16x16:
 		v.Op = OpAMD64VPSIGNW256
 		return true
@@ -1818,18 +1500,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpDivFloat64x8:
 		v.Op = OpAMD64VDIVPD512
 		return true
-	case OpDivMaskedFloat32x16:
-		return rewriteValueAMD64_OpDivMaskedFloat32x16(v)
-	case OpDivMaskedFloat32x4:
-		return rewriteValueAMD64_OpDivMaskedFloat32x4(v)
-	case OpDivMaskedFloat32x8:
-		return rewriteValueAMD64_OpDivMaskedFloat32x8(v)
-	case OpDivMaskedFloat64x2:
-		return rewriteValueAMD64_OpDivMaskedFloat64x2(v)
-	case OpDivMaskedFloat64x4:
-		return rewriteValueAMD64_OpDivMaskedFloat64x4(v)
-	case OpDivMaskedFloat64x8:
-		return rewriteValueAMD64_OpDivMaskedFloat64x8(v)
 	case OpDotProdPairsInt16x16:
 		v.Op = OpAMD64VPMADDWD256
 		return true
@@ -1839,18 +1509,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpDotProdPairsInt16x8:
 		v.Op = OpAMD64VPMADDWD128
 		return true
-	case OpDotProdPairsMaskedInt16x16:
-		return rewriteValueAMD64_OpDotProdPairsMaskedInt16x16(v)
-	case OpDotProdPairsMaskedInt16x32:
-		return rewriteValueAMD64_OpDotProdPairsMaskedInt16x32(v)
-	case OpDotProdPairsMaskedInt16x8:
-		return rewriteValueAMD64_OpDotProdPairsMaskedInt16x8(v)
-	case OpDotProdPairsSaturatedMaskedUint8x16:
-		return rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x16(v)
-	case OpDotProdPairsSaturatedMaskedUint8x32:
-		return rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x32(v)
-	case OpDotProdPairsSaturatedMaskedUint8x64:
-		return rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x64(v)
 	case OpDotProdPairsSaturatedUint8x16:
 		v.Op = OpAMD64VPMADDUBSW128
 		return true
@@ -1920,66 +1578,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return true
 	case OpEqualInt8x64:
 		return rewriteValueAMD64_OpEqualInt8x64(v)
-	case OpEqualMaskedFloat32x16:
-		return rewriteValueAMD64_OpEqualMaskedFloat32x16(v)
-	case OpEqualMaskedFloat32x4:
-		return rewriteValueAMD64_OpEqualMaskedFloat32x4(v)
-	case OpEqualMaskedFloat32x8:
-		return rewriteValueAMD64_OpEqualMaskedFloat32x8(v)
-	case OpEqualMaskedFloat64x2:
-		return rewriteValueAMD64_OpEqualMaskedFloat64x2(v)
-	case OpEqualMaskedFloat64x4:
-		return rewriteValueAMD64_OpEqualMaskedFloat64x4(v)
-	case OpEqualMaskedFloat64x8:
-		return rewriteValueAMD64_OpEqualMaskedFloat64x8(v)
-	case OpEqualMaskedInt16x16:
-		return rewriteValueAMD64_OpEqualMaskedInt16x16(v)
-	case OpEqualMaskedInt16x32:
-		return rewriteValueAMD64_OpEqualMaskedInt16x32(v)
-	case OpEqualMaskedInt16x8:
-		return rewriteValueAMD64_OpEqualMaskedInt16x8(v)
-	case OpEqualMaskedInt32x16:
-		return rewriteValueAMD64_OpEqualMaskedInt32x16(v)
-	case OpEqualMaskedInt32x4:
-		return rewriteValueAMD64_OpEqualMaskedInt32x4(v)
-	case OpEqualMaskedInt32x8:
-		return rewriteValueAMD64_OpEqualMaskedInt32x8(v)
-	case OpEqualMaskedInt64x2:
-		return rewriteValueAMD64_OpEqualMaskedInt64x2(v)
-	case OpEqualMaskedInt64x4:
-		return rewriteValueAMD64_OpEqualMaskedInt64x4(v)
-	case OpEqualMaskedInt64x8:
-		return rewriteValueAMD64_OpEqualMaskedInt64x8(v)
-	case OpEqualMaskedInt8x16:
-		return rewriteValueAMD64_OpEqualMaskedInt8x16(v)
-	case OpEqualMaskedInt8x32:
-		return rewriteValueAMD64_OpEqualMaskedInt8x32(v)
-	case OpEqualMaskedInt8x64:
-		return rewriteValueAMD64_OpEqualMaskedInt8x64(v)
-	case OpEqualMaskedUint16x16:
-		return rewriteValueAMD64_OpEqualMaskedUint16x16(v)
-	case OpEqualMaskedUint16x32:
-		return rewriteValueAMD64_OpEqualMaskedUint16x32(v)
-	case OpEqualMaskedUint16x8:
-		return rewriteValueAMD64_OpEqualMaskedUint16x8(v)
-	case OpEqualMaskedUint32x16:
-		return rewriteValueAMD64_OpEqualMaskedUint32x16(v)
-	case OpEqualMaskedUint32x4:
-		return rewriteValueAMD64_OpEqualMaskedUint32x4(v)
-	case OpEqualMaskedUint32x8:
-		return rewriteValueAMD64_OpEqualMaskedUint32x8(v)
-	case OpEqualMaskedUint64x2:
-		return rewriteValueAMD64_OpEqualMaskedUint64x2(v)
-	case OpEqualMaskedUint64x4:
-		return rewriteValueAMD64_OpEqualMaskedUint64x4(v)
-	case OpEqualMaskedUint64x8:
-		return rewriteValueAMD64_OpEqualMaskedUint64x8(v)
-	case OpEqualMaskedUint8x16:
-		return rewriteValueAMD64_OpEqualMaskedUint8x16(v)
-	case OpEqualMaskedUint8x32:
-		return rewriteValueAMD64_OpEqualMaskedUint8x32(v)
-	case OpEqualMaskedUint8x64:
-		return rewriteValueAMD64_OpEqualMaskedUint8x64(v)
 	case OpEqualUint16x16:
 		v.Op = OpAMD64VPCMPEQW256
 		return true
@@ -2096,18 +1694,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpFloorScaledFloat64x4(v)
 	case OpFloorScaledFloat64x8:
 		return rewriteValueAMD64_OpFloorScaledFloat64x8(v)
-	case OpFloorScaledMaskedFloat32x16:
-		return rewriteValueAMD64_OpFloorScaledMaskedFloat32x16(v)
-	case OpFloorScaledMaskedFloat32x4:
-		return rewriteValueAMD64_OpFloorScaledMaskedFloat32x4(v)
-	case OpFloorScaledMaskedFloat32x8:
-		return rewriteValueAMD64_OpFloorScaledMaskedFloat32x8(v)
-	case OpFloorScaledMaskedFloat64x2:
-		return rewriteValueAMD64_OpFloorScaledMaskedFloat64x2(v)
-	case OpFloorScaledMaskedFloat64x4:
-		return rewriteValueAMD64_OpFloorScaledMaskedFloat64x4(v)
-	case OpFloorScaledMaskedFloat64x8:
-		return rewriteValueAMD64_OpFloorScaledMaskedFloat64x8(v)
 	case OpFloorScaledResidueFloat32x16:
 		return rewriteValueAMD64_OpFloorScaledResidueFloat32x16(v)
 	case OpFloorScaledResidueFloat32x4:
@@ -2120,24 +1706,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpFloorScaledResidueFloat64x4(v)
 	case OpFloorScaledResidueFloat64x8:
 		return rewriteValueAMD64_OpFloorScaledResidueFloat64x8(v)
-	case OpFloorScaledResidueMaskedFloat32x16:
-		return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x16(v)
-	case OpFloorScaledResidueMaskedFloat32x4:
-		return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x4(v)
-	case OpFloorScaledResidueMaskedFloat32x8:
-		return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x8(v)
-	case OpFloorScaledResidueMaskedFloat64x2:
-		return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x2(v)
-	case OpFloorScaledResidueMaskedFloat64x4:
-		return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x4(v)
-	case OpFloorScaledResidueMaskedFloat64x8:
-		return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x8(v)
-	case OpGaloisFieldAffineTransformInverseMaskedUint8x16:
-		return rewriteValueAMD64_OpGaloisFieldAffineTransformInverseMaskedUint8x16(v)
-	case OpGaloisFieldAffineTransformInverseMaskedUint8x32:
-		return rewriteValueAMD64_OpGaloisFieldAffineTransformInverseMaskedUint8x32(v)
-	case OpGaloisFieldAffineTransformInverseMaskedUint8x64:
-		return rewriteValueAMD64_OpGaloisFieldAffineTransformInverseMaskedUint8x64(v)
 	case OpGaloisFieldAffineTransformInverseUint8x16:
 		v.Op = OpAMD64VGF2P8AFFINEINVQB128
 		return true
@@ -2147,12 +1715,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpGaloisFieldAffineTransformInverseUint8x64:
 		v.Op = OpAMD64VGF2P8AFFINEINVQB512
 		return true
-	case OpGaloisFieldAffineTransformMaskedUint8x16:
-		return rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x16(v)
-	case OpGaloisFieldAffineTransformMaskedUint8x32:
-		return rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x32(v)
-	case OpGaloisFieldAffineTransformMaskedUint8x64:
-		return rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x64(v)
 	case OpGaloisFieldAffineTransformUint8x16:
 		v.Op = OpAMD64VGF2P8AFFINEQB128
 		return true
@@ -2162,12 +1724,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpGaloisFieldAffineTransformUint8x64:
 		v.Op = OpAMD64VGF2P8AFFINEQB512
 		return true
-	case OpGaloisFieldMulMaskedUint8x16:
-		return rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x16(v)
-	case OpGaloisFieldMulMaskedUint8x32:
-		return rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x32(v)
-	case OpGaloisFieldMulMaskedUint8x64:
-		return rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x64(v)
 	case OpGaloisFieldMulUint8x16:
 		v.Op = OpAMD64VGF2P8MULB128
 		return true
@@ -2318,66 +1874,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpGreaterEqualInt64x8(v)
 	case OpGreaterEqualInt8x64:
 		return rewriteValueAMD64_OpGreaterEqualInt8x64(v)
-	case OpGreaterEqualMaskedFloat32x16:
-		return rewriteValueAMD64_OpGreaterEqualMaskedFloat32x16(v)
-	case OpGreaterEqualMaskedFloat32x4:
-		return rewriteValueAMD64_OpGreaterEqualMaskedFloat32x4(v)
-	case OpGreaterEqualMaskedFloat32x8:
-		return rewriteValueAMD64_OpGreaterEqualMaskedFloat32x8(v)
-	case OpGreaterEqualMaskedFloat64x2:
-		return rewriteValueAMD64_OpGreaterEqualMaskedFloat64x2(v)
-	case OpGreaterEqualMaskedFloat64x4:
-		return rewriteValueAMD64_OpGreaterEqualMaskedFloat64x4(v)
-	case OpGreaterEqualMaskedFloat64x8:
-		return rewriteValueAMD64_OpGreaterEqualMaskedFloat64x8(v)
-	case OpGreaterEqualMaskedInt16x16:
-		return rewriteValueAMD64_OpGreaterEqualMaskedInt16x16(v)
-	case OpGreaterEqualMaskedInt16x32:
-		return rewriteValueAMD64_OpGreaterEqualMaskedInt16x32(v)
-	case OpGreaterEqualMaskedInt16x8:
-		return rewriteValueAMD64_OpGreaterEqualMaskedInt16x8(v)
-	case OpGreaterEqualMaskedInt32x16:
-		return rewriteValueAMD64_OpGreaterEqualMaskedInt32x16(v)
-	case OpGreaterEqualMaskedInt32x4:
-		return rewriteValueAMD64_OpGreaterEqualMaskedInt32x4(v)
-	case OpGreaterEqualMaskedInt32x8:
-		return rewriteValueAMD64_OpGreaterEqualMaskedInt32x8(v)
-	case OpGreaterEqualMaskedInt64x2:
-		return rewriteValueAMD64_OpGreaterEqualMaskedInt64x2(v)
-	case OpGreaterEqualMaskedInt64x4:
-		return rewriteValueAMD64_OpGreaterEqualMaskedInt64x4(v)
-	case OpGreaterEqualMaskedInt64x8:
-		return rewriteValueAMD64_OpGreaterEqualMaskedInt64x8(v)
-	case OpGreaterEqualMaskedInt8x16:
-		return rewriteValueAMD64_OpGreaterEqualMaskedInt8x16(v)
-	case OpGreaterEqualMaskedInt8x32:
-		return rewriteValueAMD64_OpGreaterEqualMaskedInt8x32(v)
-	case OpGreaterEqualMaskedInt8x64:
-		return rewriteValueAMD64_OpGreaterEqualMaskedInt8x64(v)
-	case OpGreaterEqualMaskedUint16x16:
-		return rewriteValueAMD64_OpGreaterEqualMaskedUint16x16(v)
-	case OpGreaterEqualMaskedUint16x32:
-		return rewriteValueAMD64_OpGreaterEqualMaskedUint16x32(v)
-	case OpGreaterEqualMaskedUint16x8:
-		return rewriteValueAMD64_OpGreaterEqualMaskedUint16x8(v)
-	case OpGreaterEqualMaskedUint32x16:
-		return rewriteValueAMD64_OpGreaterEqualMaskedUint32x16(v)
-	case OpGreaterEqualMaskedUint32x4:
-		return rewriteValueAMD64_OpGreaterEqualMaskedUint32x4(v)
-	case OpGreaterEqualMaskedUint32x8:
-		return rewriteValueAMD64_OpGreaterEqualMaskedUint32x8(v)
-	case OpGreaterEqualMaskedUint64x2:
-		return rewriteValueAMD64_OpGreaterEqualMaskedUint64x2(v)
-	case OpGreaterEqualMaskedUint64x4:
-		return rewriteValueAMD64_OpGreaterEqualMaskedUint64x4(v)
-	case OpGreaterEqualMaskedUint64x8:
-		return rewriteValueAMD64_OpGreaterEqualMaskedUint64x8(v)
-	case OpGreaterEqualMaskedUint8x16:
-		return rewriteValueAMD64_OpGreaterEqualMaskedUint8x16(v)
-	case OpGreaterEqualMaskedUint8x32:
-		return rewriteValueAMD64_OpGreaterEqualMaskedUint8x32(v)
-	case OpGreaterEqualMaskedUint8x64:
-		return rewriteValueAMD64_OpGreaterEqualMaskedUint8x64(v)
 	case OpGreaterEqualUint16x32:
 		return rewriteValueAMD64_OpGreaterEqualUint16x32(v)
 	case OpGreaterEqualUint32x16:
@@ -2430,66 +1926,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return true
 	case OpGreaterInt8x64:
 		return rewriteValueAMD64_OpGreaterInt8x64(v)
-	case OpGreaterMaskedFloat32x16:
-		return rewriteValueAMD64_OpGreaterMaskedFloat32x16(v)
-	case OpGreaterMaskedFloat32x4:
-		return rewriteValueAMD64_OpGreaterMaskedFloat32x4(v)
-	case OpGreaterMaskedFloat32x8:
-		return rewriteValueAMD64_OpGreaterMaskedFloat32x8(v)
-	case OpGreaterMaskedFloat64x2:
-		return rewriteValueAMD64_OpGreaterMaskedFloat64x2(v)
-	case OpGreaterMaskedFloat64x4:
-		return rewriteValueAMD64_OpGreaterMaskedFloat64x4(v)
-	case OpGreaterMaskedFloat64x8:
-		return rewriteValueAMD64_OpGreaterMaskedFloat64x8(v)
-	case OpGreaterMaskedInt16x16:
-		return rewriteValueAMD64_OpGreaterMaskedInt16x16(v)
-	case OpGreaterMaskedInt16x32:
-		return rewriteValueAMD64_OpGreaterMaskedInt16x32(v)
-	case OpGreaterMaskedInt16x8:
-		return rewriteValueAMD64_OpGreaterMaskedInt16x8(v)
-	case OpGreaterMaskedInt32x16:
-		return rewriteValueAMD64_OpGreaterMaskedInt32x16(v)
-	case OpGreaterMaskedInt32x4:
-		return rewriteValueAMD64_OpGreaterMaskedInt32x4(v)
-	case OpGreaterMaskedInt32x8:
-		return rewriteValueAMD64_OpGreaterMaskedInt32x8(v)
-	case OpGreaterMaskedInt64x2:
-		return rewriteValueAMD64_OpGreaterMaskedInt64x2(v)
-	case OpGreaterMaskedInt64x4:
-		return rewriteValueAMD64_OpGreaterMaskedInt64x4(v)
-	case OpGreaterMaskedInt64x8:
-		return rewriteValueAMD64_OpGreaterMaskedInt64x8(v)
-	case OpGreaterMaskedInt8x16:
-		return rewriteValueAMD64_OpGreaterMaskedInt8x16(v)
-	case OpGreaterMaskedInt8x32:
-		return rewriteValueAMD64_OpGreaterMaskedInt8x32(v)
-	case OpGreaterMaskedInt8x64:
-		return rewriteValueAMD64_OpGreaterMaskedInt8x64(v)
-	case OpGreaterMaskedUint16x16:
-		return rewriteValueAMD64_OpGreaterMaskedUint16x16(v)
-	case OpGreaterMaskedUint16x32:
-		return rewriteValueAMD64_OpGreaterMaskedUint16x32(v)
-	case OpGreaterMaskedUint16x8:
-		return rewriteValueAMD64_OpGreaterMaskedUint16x8(v)
-	case OpGreaterMaskedUint32x16:
-		return rewriteValueAMD64_OpGreaterMaskedUint32x16(v)
-	case OpGreaterMaskedUint32x4:
-		return rewriteValueAMD64_OpGreaterMaskedUint32x4(v)
-	case OpGreaterMaskedUint32x8:
-		return rewriteValueAMD64_OpGreaterMaskedUint32x8(v)
-	case OpGreaterMaskedUint64x2:
-		return rewriteValueAMD64_OpGreaterMaskedUint64x2(v)
-	case OpGreaterMaskedUint64x4:
-		return rewriteValueAMD64_OpGreaterMaskedUint64x4(v)
-	case OpGreaterMaskedUint64x8:
-		return rewriteValueAMD64_OpGreaterMaskedUint64x8(v)
-	case OpGreaterMaskedUint8x16:
-		return rewriteValueAMD64_OpGreaterMaskedUint8x16(v)
-	case OpGreaterMaskedUint8x32:
-		return rewriteValueAMD64_OpGreaterMaskedUint8x32(v)
-	case OpGreaterMaskedUint8x64:
-		return rewriteValueAMD64_OpGreaterMaskedUint8x64(v)
 	case OpGreaterUint16x32:
 		return rewriteValueAMD64_OpGreaterUint16x32(v)
 	case OpGreaterUint32x16:
@@ -2529,18 +1965,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpIsNanFloat64x4(v)
 	case OpIsNanFloat64x8:
 		return rewriteValueAMD64_OpIsNanFloat64x8(v)
-	case OpIsNanMaskedFloat32x16:
-		return rewriteValueAMD64_OpIsNanMaskedFloat32x16(v)
-	case OpIsNanMaskedFloat32x4:
-		return rewriteValueAMD64_OpIsNanMaskedFloat32x4(v)
-	case OpIsNanMaskedFloat32x8:
-		return rewriteValueAMD64_OpIsNanMaskedFloat32x8(v)
-	case OpIsNanMaskedFloat64x2:
-		return rewriteValueAMD64_OpIsNanMaskedFloat64x2(v)
-	case OpIsNanMaskedFloat64x4:
-		return rewriteValueAMD64_OpIsNanMaskedFloat64x4(v)
-	case OpIsNanMaskedFloat64x8:
-		return rewriteValueAMD64_OpIsNanMaskedFloat64x8(v)
 	case OpIsNonNil:
 		return rewriteValueAMD64_OpIsNonNil(v)
 	case OpIsSliceInBounds:
@@ -2605,66 +2029,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpLessEqualInt64x8(v)
 	case OpLessEqualInt8x64:
 		return rewriteValueAMD64_OpLessEqualInt8x64(v)
-	case OpLessEqualMaskedFloat32x16:
-		return rewriteValueAMD64_OpLessEqualMaskedFloat32x16(v)
-	case OpLessEqualMaskedFloat32x4:
-		return rewriteValueAMD64_OpLessEqualMaskedFloat32x4(v)
-	case OpLessEqualMaskedFloat32x8:
-		return rewriteValueAMD64_OpLessEqualMaskedFloat32x8(v)
-	case OpLessEqualMaskedFloat64x2:
-		return rewriteValueAMD64_OpLessEqualMaskedFloat64x2(v)
-	case OpLessEqualMaskedFloat64x4:
-		return rewriteValueAMD64_OpLessEqualMaskedFloat64x4(v)
-	case OpLessEqualMaskedFloat64x8:
-		return rewriteValueAMD64_OpLessEqualMaskedFloat64x8(v)
-	case OpLessEqualMaskedInt16x16:
-		return rewriteValueAMD64_OpLessEqualMaskedInt16x16(v)
-	case OpLessEqualMaskedInt16x32:
-		return rewriteValueAMD64_OpLessEqualMaskedInt16x32(v)
-	case OpLessEqualMaskedInt16x8:
-		return rewriteValueAMD64_OpLessEqualMaskedInt16x8(v)
-	case OpLessEqualMaskedInt32x16:
-		return rewriteValueAMD64_OpLessEqualMaskedInt32x16(v)
-	case OpLessEqualMaskedInt32x4:
-		return rewriteValueAMD64_OpLessEqualMaskedInt32x4(v)
-	case OpLessEqualMaskedInt32x8:
-		return rewriteValueAMD64_OpLessEqualMaskedInt32x8(v)
-	case OpLessEqualMaskedInt64x2:
-		return rewriteValueAMD64_OpLessEqualMaskedInt64x2(v)
-	case OpLessEqualMaskedInt64x4:
-		return rewriteValueAMD64_OpLessEqualMaskedInt64x4(v)
-	case OpLessEqualMaskedInt64x8:
-		return rewriteValueAMD64_OpLessEqualMaskedInt64x8(v)
-	case OpLessEqualMaskedInt8x16:
-		return rewriteValueAMD64_OpLessEqualMaskedInt8x16(v)
-	case OpLessEqualMaskedInt8x32:
-		return rewriteValueAMD64_OpLessEqualMaskedInt8x32(v)
-	case OpLessEqualMaskedInt8x64:
-		return rewriteValueAMD64_OpLessEqualMaskedInt8x64(v)
-	case OpLessEqualMaskedUint16x16:
-		return rewriteValueAMD64_OpLessEqualMaskedUint16x16(v)
-	case OpLessEqualMaskedUint16x32:
-		return rewriteValueAMD64_OpLessEqualMaskedUint16x32(v)
-	case OpLessEqualMaskedUint16x8:
-		return rewriteValueAMD64_OpLessEqualMaskedUint16x8(v)
-	case OpLessEqualMaskedUint32x16:
-		return rewriteValueAMD64_OpLessEqualMaskedUint32x16(v)
-	case OpLessEqualMaskedUint32x4:
-		return rewriteValueAMD64_OpLessEqualMaskedUint32x4(v)
-	case OpLessEqualMaskedUint32x8:
-		return rewriteValueAMD64_OpLessEqualMaskedUint32x8(v)
-	case OpLessEqualMaskedUint64x2:
-		return rewriteValueAMD64_OpLessEqualMaskedUint64x2(v)
-	case OpLessEqualMaskedUint64x4:
-		return rewriteValueAMD64_OpLessEqualMaskedUint64x4(v)
-	case OpLessEqualMaskedUint64x8:
-		return rewriteValueAMD64_OpLessEqualMaskedUint64x8(v)
-	case OpLessEqualMaskedUint8x16:
-		return rewriteValueAMD64_OpLessEqualMaskedUint8x16(v)
-	case OpLessEqualMaskedUint8x32:
-		return rewriteValueAMD64_OpLessEqualMaskedUint8x32(v)
-	case OpLessEqualMaskedUint8x64:
-		return rewriteValueAMD64_OpLessEqualMaskedUint8x64(v)
 	case OpLessEqualUint16x32:
 		return rewriteValueAMD64_OpLessEqualUint16x32(v)
 	case OpLessEqualUint32x16:
@@ -2693,66 +2057,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpLessInt64x8(v)
 	case OpLessInt8x64:
 		return rewriteValueAMD64_OpLessInt8x64(v)
-	case OpLessMaskedFloat32x16:
-		return rewriteValueAMD64_OpLessMaskedFloat32x16(v)
-	case OpLessMaskedFloat32x4:
-		return rewriteValueAMD64_OpLessMaskedFloat32x4(v)
-	case OpLessMaskedFloat32x8:
-		return rewriteValueAMD64_OpLessMaskedFloat32x8(v)
-	case OpLessMaskedFloat64x2:
-		return rewriteValueAMD64_OpLessMaskedFloat64x2(v)
-	case OpLessMaskedFloat64x4:
-		return rewriteValueAMD64_OpLessMaskedFloat64x4(v)
-	case OpLessMaskedFloat64x8:
-		return rewriteValueAMD64_OpLessMaskedFloat64x8(v)
-	case OpLessMaskedInt16x16:
-		return rewriteValueAMD64_OpLessMaskedInt16x16(v)
-	case OpLessMaskedInt16x32:
-		return rewriteValueAMD64_OpLessMaskedInt16x32(v)
-	case OpLessMaskedInt16x8:
-		return rewriteValueAMD64_OpLessMaskedInt16x8(v)
-	case OpLessMaskedInt32x16:
-		return rewriteValueAMD64_OpLessMaskedInt32x16(v)
-	case OpLessMaskedInt32x4:
-		return rewriteValueAMD64_OpLessMaskedInt32x4(v)
-	case OpLessMaskedInt32x8:
-		return rewriteValueAMD64_OpLessMaskedInt32x8(v)
-	case OpLessMaskedInt64x2:
-		return rewriteValueAMD64_OpLessMaskedInt64x2(v)
-	case OpLessMaskedInt64x4:
-		return rewriteValueAMD64_OpLessMaskedInt64x4(v)
-	case OpLessMaskedInt64x8:
-		return rewriteValueAMD64_OpLessMaskedInt64x8(v)
-	case OpLessMaskedInt8x16:
-		return rewriteValueAMD64_OpLessMaskedInt8x16(v)
-	case OpLessMaskedInt8x32:
-		return rewriteValueAMD64_OpLessMaskedInt8x32(v)
-	case OpLessMaskedInt8x64:
-		return rewriteValueAMD64_OpLessMaskedInt8x64(v)
-	case OpLessMaskedUint16x16:
-		return rewriteValueAMD64_OpLessMaskedUint16x16(v)
-	case OpLessMaskedUint16x32:
-		return rewriteValueAMD64_OpLessMaskedUint16x32(v)
-	case OpLessMaskedUint16x8:
-		return rewriteValueAMD64_OpLessMaskedUint16x8(v)
-	case OpLessMaskedUint32x16:
-		return rewriteValueAMD64_OpLessMaskedUint32x16(v)
-	case OpLessMaskedUint32x4:
-		return rewriteValueAMD64_OpLessMaskedUint32x4(v)
-	case OpLessMaskedUint32x8:
-		return rewriteValueAMD64_OpLessMaskedUint32x8(v)
-	case OpLessMaskedUint64x2:
-		return rewriteValueAMD64_OpLessMaskedUint64x2(v)
-	case OpLessMaskedUint64x4:
-		return rewriteValueAMD64_OpLessMaskedUint64x4(v)
-	case OpLessMaskedUint64x8:
-		return rewriteValueAMD64_OpLessMaskedUint64x8(v)
-	case OpLessMaskedUint8x16:
-		return rewriteValueAMD64_OpLessMaskedUint8x16(v)
-	case OpLessMaskedUint8x32:
-		return rewriteValueAMD64_OpLessMaskedUint8x32(v)
-	case OpLessMaskedUint8x64:
-		return rewriteValueAMD64_OpLessMaskedUint8x64(v)
 	case OpLessUint16x32:
 		return rewriteValueAMD64_OpLessUint16x32(v)
 	case OpLessUint32x16:
@@ -2887,66 +2191,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpMaxInt8x64:
 		v.Op = OpAMD64VPMAXSB512
 		return true
-	case OpMaxMaskedFloat32x16:
-		return rewriteValueAMD64_OpMaxMaskedFloat32x16(v)
-	case OpMaxMaskedFloat32x4:
-		return rewriteValueAMD64_OpMaxMaskedFloat32x4(v)
-	case OpMaxMaskedFloat32x8:
-		return rewriteValueAMD64_OpMaxMaskedFloat32x8(v)
-	case OpMaxMaskedFloat64x2:
-		return rewriteValueAMD64_OpMaxMaskedFloat64x2(v)
-	case OpMaxMaskedFloat64x4:
-		return rewriteValueAMD64_OpMaxMaskedFloat64x4(v)
-	case OpMaxMaskedFloat64x8:
-		return rewriteValueAMD64_OpMaxMaskedFloat64x8(v)
-	case OpMaxMaskedInt16x16:
-		return rewriteValueAMD64_OpMaxMaskedInt16x16(v)
-	case OpMaxMaskedInt16x32:
-		return rewriteValueAMD64_OpMaxMaskedInt16x32(v)
-	case OpMaxMaskedInt16x8:
-		return rewriteValueAMD64_OpMaxMaskedInt16x8(v)
-	case OpMaxMaskedInt32x16:
-		return rewriteValueAMD64_OpMaxMaskedInt32x16(v)
-	case OpMaxMaskedInt32x4:
-		return rewriteValueAMD64_OpMaxMaskedInt32x4(v)
-	case OpMaxMaskedInt32x8:
-		return rewriteValueAMD64_OpMaxMaskedInt32x8(v)
-	case OpMaxMaskedInt64x2:
-		return rewriteValueAMD64_OpMaxMaskedInt64x2(v)
-	case OpMaxMaskedInt64x4:
-		return rewriteValueAMD64_OpMaxMaskedInt64x4(v)
-	case OpMaxMaskedInt64x8:
-		return rewriteValueAMD64_OpMaxMaskedInt64x8(v)
-	case OpMaxMaskedInt8x16:
-		return rewriteValueAMD64_OpMaxMaskedInt8x16(v)
-	case OpMaxMaskedInt8x32:
-		return rewriteValueAMD64_OpMaxMaskedInt8x32(v)
-	case OpMaxMaskedInt8x64:
-		return rewriteValueAMD64_OpMaxMaskedInt8x64(v)
-	case OpMaxMaskedUint16x16:
-		return rewriteValueAMD64_OpMaxMaskedUint16x16(v)
-	case OpMaxMaskedUint16x32:
-		return rewriteValueAMD64_OpMaxMaskedUint16x32(v)
-	case OpMaxMaskedUint16x8:
-		return rewriteValueAMD64_OpMaxMaskedUint16x8(v)
-	case OpMaxMaskedUint32x16:
-		return rewriteValueAMD64_OpMaxMaskedUint32x16(v)
-	case OpMaxMaskedUint32x4:
-		return rewriteValueAMD64_OpMaxMaskedUint32x4(v)
-	case OpMaxMaskedUint32x8:
-		return rewriteValueAMD64_OpMaxMaskedUint32x8(v)
-	case OpMaxMaskedUint64x2:
-		return rewriteValueAMD64_OpMaxMaskedUint64x2(v)
-	case OpMaxMaskedUint64x4:
-		return rewriteValueAMD64_OpMaxMaskedUint64x4(v)
-	case OpMaxMaskedUint64x8:
-		return rewriteValueAMD64_OpMaxMaskedUint64x8(v)
-	case OpMaxMaskedUint8x16:
-		return rewriteValueAMD64_OpMaxMaskedUint8x16(v)
-	case OpMaxMaskedUint8x32:
-		return rewriteValueAMD64_OpMaxMaskedUint8x32(v)
-	case OpMaxMaskedUint8x64:
-		return rewriteValueAMD64_OpMaxMaskedUint8x64(v)
 	case OpMaxUint16x16:
 		v.Op = OpAMD64VPMAXUW256
 		return true
@@ -3041,66 +2285,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpMinInt8x64:
 		v.Op = OpAMD64VPMINSB512
 		return true
-	case OpMinMaskedFloat32x16:
-		return rewriteValueAMD64_OpMinMaskedFloat32x16(v)
-	case OpMinMaskedFloat32x4:
-		return rewriteValueAMD64_OpMinMaskedFloat32x4(v)
-	case OpMinMaskedFloat32x8:
-		return rewriteValueAMD64_OpMinMaskedFloat32x8(v)
-	case OpMinMaskedFloat64x2:
-		return rewriteValueAMD64_OpMinMaskedFloat64x2(v)
-	case OpMinMaskedFloat64x4:
-		return rewriteValueAMD64_OpMinMaskedFloat64x4(v)
-	case OpMinMaskedFloat64x8:
-		return rewriteValueAMD64_OpMinMaskedFloat64x8(v)
-	case OpMinMaskedInt16x16:
-		return rewriteValueAMD64_OpMinMaskedInt16x16(v)
-	case OpMinMaskedInt16x32:
-		return rewriteValueAMD64_OpMinMaskedInt16x32(v)
-	case OpMinMaskedInt16x8:
-		return rewriteValueAMD64_OpMinMaskedInt16x8(v)
-	case OpMinMaskedInt32x16:
-		return rewriteValueAMD64_OpMinMaskedInt32x16(v)
-	case OpMinMaskedInt32x4:
-		return rewriteValueAMD64_OpMinMaskedInt32x4(v)
-	case OpMinMaskedInt32x8:
-		return rewriteValueAMD64_OpMinMaskedInt32x8(v)
-	case OpMinMaskedInt64x2:
-		return rewriteValueAMD64_OpMinMaskedInt64x2(v)
-	case OpMinMaskedInt64x4:
-		return rewriteValueAMD64_OpMinMaskedInt64x4(v)
-	case OpMinMaskedInt64x8:
-		return rewriteValueAMD64_OpMinMaskedInt64x8(v)
-	case OpMinMaskedInt8x16:
-		return rewriteValueAMD64_OpMinMaskedInt8x16(v)
-	case OpMinMaskedInt8x32:
-		return rewriteValueAMD64_OpMinMaskedInt8x32(v)
-	case OpMinMaskedInt8x64:
-		return rewriteValueAMD64_OpMinMaskedInt8x64(v)
-	case OpMinMaskedUint16x16:
-		return rewriteValueAMD64_OpMinMaskedUint16x16(v)
-	case OpMinMaskedUint16x32:
-		return rewriteValueAMD64_OpMinMaskedUint16x32(v)
-	case OpMinMaskedUint16x8:
-		return rewriteValueAMD64_OpMinMaskedUint16x8(v)
-	case OpMinMaskedUint32x16:
-		return rewriteValueAMD64_OpMinMaskedUint32x16(v)
-	case OpMinMaskedUint32x4:
-		return rewriteValueAMD64_OpMinMaskedUint32x4(v)
-	case OpMinMaskedUint32x8:
-		return rewriteValueAMD64_OpMinMaskedUint32x8(v)
-	case OpMinMaskedUint64x2:
-		return rewriteValueAMD64_OpMinMaskedUint64x2(v)
-	case OpMinMaskedUint64x4:
-		return rewriteValueAMD64_OpMinMaskedUint64x4(v)
-	case OpMinMaskedUint64x8:
-		return rewriteValueAMD64_OpMinMaskedUint64x8(v)
-	case OpMinMaskedUint8x16:
-		return rewriteValueAMD64_OpMinMaskedUint8x16(v)
-	case OpMinMaskedUint8x32:
-		return rewriteValueAMD64_OpMinMaskedUint8x32(v)
-	case OpMinMaskedUint8x64:
-		return rewriteValueAMD64_OpMinMaskedUint8x64(v)
 	case OpMinUint16x16:
 		v.Op = OpAMD64VPMINUW256
 		return true
@@ -3194,18 +2378,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpMulAddFloat64x8:
 		v.Op = OpAMD64VFMADD213PD512
 		return true
-	case OpMulAddMaskedFloat32x16:
-		return rewriteValueAMD64_OpMulAddMaskedFloat32x16(v)
-	case OpMulAddMaskedFloat32x4:
-		return rewriteValueAMD64_OpMulAddMaskedFloat32x4(v)
-	case OpMulAddMaskedFloat32x8:
-		return rewriteValueAMD64_OpMulAddMaskedFloat32x8(v)
-	case OpMulAddMaskedFloat64x2:
-		return rewriteValueAMD64_OpMulAddMaskedFloat64x2(v)
-	case OpMulAddMaskedFloat64x4:
-		return rewriteValueAMD64_OpMulAddMaskedFloat64x4(v)
-	case OpMulAddMaskedFloat64x8:
-		return rewriteValueAMD64_OpMulAddMaskedFloat64x8(v)
 	case OpMulAddSubFloat32x16:
 		v.Op = OpAMD64VFMADDSUB213PS512
 		return true
@@ -3224,18 +2396,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpMulAddSubFloat64x8:
 		v.Op = OpAMD64VFMADDSUB213PD512
 		return true
-	case OpMulAddSubMaskedFloat32x16:
-		return rewriteValueAMD64_OpMulAddSubMaskedFloat32x16(v)
-	case OpMulAddSubMaskedFloat32x4:
-		return rewriteValueAMD64_OpMulAddSubMaskedFloat32x4(v)
-	case OpMulAddSubMaskedFloat32x8:
-		return rewriteValueAMD64_OpMulAddSubMaskedFloat32x8(v)
-	case OpMulAddSubMaskedFloat64x2:
-		return rewriteValueAMD64_OpMulAddSubMaskedFloat64x2(v)
-	case OpMulAddSubMaskedFloat64x4:
-		return rewriteValueAMD64_OpMulAddSubMaskedFloat64x4(v)
-	case OpMulAddSubMaskedFloat64x8:
-		return rewriteValueAMD64_OpMulAddSubMaskedFloat64x8(v)
 	case OpMulEvenWidenInt32x4:
 		v.Op = OpAMD64VPMULDQ128
 		return true
@@ -3275,18 +2435,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpMulHighInt16x8:
 		v.Op = OpAMD64VPMULHW128
 		return true
-	case OpMulHighMaskedInt16x16:
-		return rewriteValueAMD64_OpMulHighMaskedInt16x16(v)
-	case OpMulHighMaskedInt16x32:
-		return rewriteValueAMD64_OpMulHighMaskedInt16x32(v)
-	case OpMulHighMaskedInt16x8:
-		return rewriteValueAMD64_OpMulHighMaskedInt16x8(v)
-	case OpMulHighMaskedUint16x16:
-		return rewriteValueAMD64_OpMulHighMaskedUint16x16(v)
-	case OpMulHighMaskedUint16x32:
-		return rewriteValueAMD64_OpMulHighMaskedUint16x32(v)
-	case OpMulHighMaskedUint16x8:
-		return rewriteValueAMD64_OpMulHighMaskedUint16x8(v)
 	case OpMulHighUint16x16:
 		v.Op = OpAMD64VPMULHUW256
 		return true
@@ -3323,54 +2471,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpMulInt64x8:
 		v.Op = OpAMD64VPMULLQ512
 		return true
-	case OpMulMaskedFloat32x16:
-		return rewriteValueAMD64_OpMulMaskedFloat32x16(v)
-	case OpMulMaskedFloat32x4:
-		return rewriteValueAMD64_OpMulMaskedFloat32x4(v)
-	case OpMulMaskedFloat32x8:
-		return rewriteValueAMD64_OpMulMaskedFloat32x8(v)
-	case OpMulMaskedFloat64x2:
-		return rewriteValueAMD64_OpMulMaskedFloat64x2(v)
-	case OpMulMaskedFloat64x4:
-		return rewriteValueAMD64_OpMulMaskedFloat64x4(v)
-	case OpMulMaskedFloat64x8:
-		return rewriteValueAMD64_OpMulMaskedFloat64x8(v)
-	case OpMulMaskedInt16x16:
-		return rewriteValueAMD64_OpMulMaskedInt16x16(v)
-	case OpMulMaskedInt16x32:
-		return rewriteValueAMD64_OpMulMaskedInt16x32(v)
-	case OpMulMaskedInt16x8:
-		return rewriteValueAMD64_OpMulMaskedInt16x8(v)
-	case OpMulMaskedInt32x16:
-		return rewriteValueAMD64_OpMulMaskedInt32x16(v)
-	case OpMulMaskedInt32x4:
-		return rewriteValueAMD64_OpMulMaskedInt32x4(v)
-	case OpMulMaskedInt32x8:
-		return rewriteValueAMD64_OpMulMaskedInt32x8(v)
-	case OpMulMaskedInt64x2:
-		return rewriteValueAMD64_OpMulMaskedInt64x2(v)
-	case OpMulMaskedInt64x4:
-		return rewriteValueAMD64_OpMulMaskedInt64x4(v)
-	case OpMulMaskedInt64x8:
-		return rewriteValueAMD64_OpMulMaskedInt64x8(v)
-	case OpMulMaskedUint16x16:
-		return rewriteValueAMD64_OpMulMaskedUint16x16(v)
-	case OpMulMaskedUint16x32:
-		return rewriteValueAMD64_OpMulMaskedUint16x32(v)
-	case OpMulMaskedUint16x8:
-		return rewriteValueAMD64_OpMulMaskedUint16x8(v)
-	case OpMulMaskedUint32x16:
-		return rewriteValueAMD64_OpMulMaskedUint32x16(v)
-	case OpMulMaskedUint32x4:
-		return rewriteValueAMD64_OpMulMaskedUint32x4(v)
-	case OpMulMaskedUint32x8:
-		return rewriteValueAMD64_OpMulMaskedUint32x8(v)
-	case OpMulMaskedUint64x2:
-		return rewriteValueAMD64_OpMulMaskedUint64x2(v)
-	case OpMulMaskedUint64x4:
-		return rewriteValueAMD64_OpMulMaskedUint64x4(v)
-	case OpMulMaskedUint64x8:
-		return rewriteValueAMD64_OpMulMaskedUint64x8(v)
 	case OpMulSubAddFloat32x16:
 		v.Op = OpAMD64VFMSUBADD213PS512
 		return true
@@ -3389,18 +2489,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpMulSubAddFloat64x8:
 		v.Op = OpAMD64VFMSUBADD213PD512
 		return true
-	case OpMulSubAddMaskedFloat32x16:
-		return rewriteValueAMD64_OpMulSubAddMaskedFloat32x16(v)
-	case OpMulSubAddMaskedFloat32x4:
-		return rewriteValueAMD64_OpMulSubAddMaskedFloat32x4(v)
-	case OpMulSubAddMaskedFloat32x8:
-		return rewriteValueAMD64_OpMulSubAddMaskedFloat32x8(v)
-	case OpMulSubAddMaskedFloat64x2:
-		return rewriteValueAMD64_OpMulSubAddMaskedFloat64x2(v)
-	case OpMulSubAddMaskedFloat64x4:
-		return rewriteValueAMD64_OpMulSubAddMaskedFloat64x4(v)
-	case OpMulSubAddMaskedFloat64x8:
-		return rewriteValueAMD64_OpMulSubAddMaskedFloat64x8(v)
 	case OpMulUint16x16:
 		v.Op = OpAMD64VPMULLW256
 		return true
@@ -3485,66 +2573,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpNotEqualInt64x8(v)
 	case OpNotEqualInt8x64:
 		return rewriteValueAMD64_OpNotEqualInt8x64(v)
-	case OpNotEqualMaskedFloat32x16:
-		return rewriteValueAMD64_OpNotEqualMaskedFloat32x16(v)
-	case OpNotEqualMaskedFloat32x4:
-		return rewriteValueAMD64_OpNotEqualMaskedFloat32x4(v)
-	case OpNotEqualMaskedFloat32x8:
-		return rewriteValueAMD64_OpNotEqualMaskedFloat32x8(v)
-	case OpNotEqualMaskedFloat64x2:
-		return rewriteValueAMD64_OpNotEqualMaskedFloat64x2(v)
-	case OpNotEqualMaskedFloat64x4:
-		return rewriteValueAMD64_OpNotEqualMaskedFloat64x4(v)
-	case OpNotEqualMaskedFloat64x8:
-		return rewriteValueAMD64_OpNotEqualMaskedFloat64x8(v)
-	case OpNotEqualMaskedInt16x16:
-		return rewriteValueAMD64_OpNotEqualMaskedInt16x16(v)
-	case OpNotEqualMaskedInt16x32:
-		return rewriteValueAMD64_OpNotEqualMaskedInt16x32(v)
-	case OpNotEqualMaskedInt16x8:
-		return rewriteValueAMD64_OpNotEqualMaskedInt16x8(v)
-	case OpNotEqualMaskedInt32x16:
-		return rewriteValueAMD64_OpNotEqualMaskedInt32x16(v)
-	case OpNotEqualMaskedInt32x4:
-		return rewriteValueAMD64_OpNotEqualMaskedInt32x4(v)
-	case OpNotEqualMaskedInt32x8:
-		return rewriteValueAMD64_OpNotEqualMaskedInt32x8(v)
-	case OpNotEqualMaskedInt64x2:
-		return rewriteValueAMD64_OpNotEqualMaskedInt64x2(v)
-	case OpNotEqualMaskedInt64x4:
-		return rewriteValueAMD64_OpNotEqualMaskedInt64x4(v)
-	case OpNotEqualMaskedInt64x8:
-		return rewriteValueAMD64_OpNotEqualMaskedInt64x8(v)
-	case OpNotEqualMaskedInt8x16:
-		return rewriteValueAMD64_OpNotEqualMaskedInt8x16(v)
-	case OpNotEqualMaskedInt8x32:
-		return rewriteValueAMD64_OpNotEqualMaskedInt8x32(v)
-	case OpNotEqualMaskedInt8x64:
-		return rewriteValueAMD64_OpNotEqualMaskedInt8x64(v)
-	case OpNotEqualMaskedUint16x16:
-		return rewriteValueAMD64_OpNotEqualMaskedUint16x16(v)
-	case OpNotEqualMaskedUint16x32:
-		return rewriteValueAMD64_OpNotEqualMaskedUint16x32(v)
-	case OpNotEqualMaskedUint16x8:
-		return rewriteValueAMD64_OpNotEqualMaskedUint16x8(v)
-	case OpNotEqualMaskedUint32x16:
-		return rewriteValueAMD64_OpNotEqualMaskedUint32x16(v)
-	case OpNotEqualMaskedUint32x4:
-		return rewriteValueAMD64_OpNotEqualMaskedUint32x4(v)
-	case OpNotEqualMaskedUint32x8:
-		return rewriteValueAMD64_OpNotEqualMaskedUint32x8(v)
-	case OpNotEqualMaskedUint64x2:
-		return rewriteValueAMD64_OpNotEqualMaskedUint64x2(v)
-	case OpNotEqualMaskedUint64x4:
-		return rewriteValueAMD64_OpNotEqualMaskedUint64x4(v)
-	case OpNotEqualMaskedUint64x8:
-		return rewriteValueAMD64_OpNotEqualMaskedUint64x8(v)
-	case OpNotEqualMaskedUint8x16:
-		return rewriteValueAMD64_OpNotEqualMaskedUint8x16(v)
-	case OpNotEqualMaskedUint8x32:
-		return rewriteValueAMD64_OpNotEqualMaskedUint8x32(v)
-	case OpNotEqualMaskedUint8x64:
-		return rewriteValueAMD64_OpNotEqualMaskedUint8x64(v)
 	case OpNotEqualUint16x32:
 		return rewriteValueAMD64_OpNotEqualUint16x32(v)
 	case OpNotEqualUint32x16:
@@ -3591,54 +2619,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpOnesCountInt8x64:
 		v.Op = OpAMD64VPOPCNTB512
 		return true
-	case OpOnesCountMaskedInt16x16:
-		return rewriteValueAMD64_OpOnesCountMaskedInt16x16(v)
-	case OpOnesCountMaskedInt16x32:
-		return rewriteValueAMD64_OpOnesCountMaskedInt16x32(v)
-	case OpOnesCountMaskedInt16x8:
-		return rewriteValueAMD64_OpOnesCountMaskedInt16x8(v)
-	case OpOnesCountMaskedInt32x16:
-		return rewriteValueAMD64_OpOnesCountMaskedInt32x16(v)
-	case OpOnesCountMaskedInt32x4:
-		return rewriteValueAMD64_OpOnesCountMaskedInt32x4(v)
-	case OpOnesCountMaskedInt32x8:
-		return rewriteValueAMD64_OpOnesCountMaskedInt32x8(v)
-	case OpOnesCountMaskedInt64x2:
-		return rewriteValueAMD64_OpOnesCountMaskedInt64x2(v)
-	case OpOnesCountMaskedInt64x4:
-		return rewriteValueAMD64_OpOnesCountMaskedInt64x4(v)
-	case OpOnesCountMaskedInt64x8:
-		return rewriteValueAMD64_OpOnesCountMaskedInt64x8(v)
-	case OpOnesCountMaskedInt8x16:
-		return rewriteValueAMD64_OpOnesCountMaskedInt8x16(v)
-	case OpOnesCountMaskedInt8x32:
-		return rewriteValueAMD64_OpOnesCountMaskedInt8x32(v)
-	case OpOnesCountMaskedInt8x64:
-		return rewriteValueAMD64_OpOnesCountMaskedInt8x64(v)
-	case OpOnesCountMaskedUint16x16:
-		return rewriteValueAMD64_OpOnesCountMaskedUint16x16(v)
-	case OpOnesCountMaskedUint16x32:
-		return rewriteValueAMD64_OpOnesCountMaskedUint16x32(v)
-	case OpOnesCountMaskedUint16x8:
-		return rewriteValueAMD64_OpOnesCountMaskedUint16x8(v)
-	case OpOnesCountMaskedUint32x16:
-		return rewriteValueAMD64_OpOnesCountMaskedUint32x16(v)
-	case OpOnesCountMaskedUint32x4:
-		return rewriteValueAMD64_OpOnesCountMaskedUint32x4(v)
-	case OpOnesCountMaskedUint32x8:
-		return rewriteValueAMD64_OpOnesCountMaskedUint32x8(v)
-	case OpOnesCountMaskedUint64x2:
-		return rewriteValueAMD64_OpOnesCountMaskedUint64x2(v)
-	case OpOnesCountMaskedUint64x4:
-		return rewriteValueAMD64_OpOnesCountMaskedUint64x4(v)
-	case OpOnesCountMaskedUint64x8:
-		return rewriteValueAMD64_OpOnesCountMaskedUint64x8(v)
-	case OpOnesCountMaskedUint8x16:
-		return rewriteValueAMD64_OpOnesCountMaskedUint8x16(v)
-	case OpOnesCountMaskedUint8x32:
-		return rewriteValueAMD64_OpOnesCountMaskedUint8x32(v)
-	case OpOnesCountMaskedUint8x64:
-		return rewriteValueAMD64_OpOnesCountMaskedUint8x64(v)
 	case OpOnesCountUint16x16:
 		v.Op = OpAMD64VPOPCNTW256
 		return true
@@ -3726,30 +2706,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpOrInt8x64:
 		v.Op = OpAMD64VPORD512
 		return true
-	case OpOrMaskedInt32x16:
-		return rewriteValueAMD64_OpOrMaskedInt32x16(v)
-	case OpOrMaskedInt32x4:
-		return rewriteValueAMD64_OpOrMaskedInt32x4(v)
-	case OpOrMaskedInt32x8:
-		return rewriteValueAMD64_OpOrMaskedInt32x8(v)
-	case OpOrMaskedInt64x2:
-		return rewriteValueAMD64_OpOrMaskedInt64x2(v)
-	case OpOrMaskedInt64x4:
-		return rewriteValueAMD64_OpOrMaskedInt64x4(v)
-	case OpOrMaskedInt64x8:
-		return rewriteValueAMD64_OpOrMaskedInt64x8(v)
-	case OpOrMaskedUint32x16:
-		return rewriteValueAMD64_OpOrMaskedUint32x16(v)
-	case OpOrMaskedUint32x4:
-		return rewriteValueAMD64_OpOrMaskedUint32x4(v)
-	case OpOrMaskedUint32x8:
-		return rewriteValueAMD64_OpOrMaskedUint32x8(v)
-	case OpOrMaskedUint64x2:
-		return rewriteValueAMD64_OpOrMaskedUint64x2(v)
-	case OpOrMaskedUint64x4:
-		return rewriteValueAMD64_OpOrMaskedUint64x4(v)
-	case OpOrMaskedUint64x8:
-		return rewriteValueAMD64_OpOrMaskedUint64x8(v)
 	case OpOrUint16x16:
 		v.Op = OpAMD64VPOR256
 		return true
@@ -3843,66 +2799,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpPermute2Int8x64:
 		v.Op = OpAMD64VPERMI2B512
 		return true
-	case OpPermute2MaskedFloat32x16:
-		return rewriteValueAMD64_OpPermute2MaskedFloat32x16(v)
-	case OpPermute2MaskedFloat32x4:
-		return rewriteValueAMD64_OpPermute2MaskedFloat32x4(v)
-	case OpPermute2MaskedFloat32x8:
-		return rewriteValueAMD64_OpPermute2MaskedFloat32x8(v)
-	case OpPermute2MaskedFloat64x2:
-		return rewriteValueAMD64_OpPermute2MaskedFloat64x2(v)
-	case OpPermute2MaskedFloat64x4:
-		return rewriteValueAMD64_OpPermute2MaskedFloat64x4(v)
-	case OpPermute2MaskedFloat64x8:
-		return rewriteValueAMD64_OpPermute2MaskedFloat64x8(v)
-	case OpPermute2MaskedInt16x16:
-		return rewriteValueAMD64_OpPermute2MaskedInt16x16(v)
-	case OpPermute2MaskedInt16x32:
-		return rewriteValueAMD64_OpPermute2MaskedInt16x32(v)
-	case OpPermute2MaskedInt16x8:
-		return rewriteValueAMD64_OpPermute2MaskedInt16x8(v)
-	case OpPermute2MaskedInt32x16:
-		return rewriteValueAMD64_OpPermute2MaskedInt32x16(v)
-	case OpPermute2MaskedInt32x4:
-		return rewriteValueAMD64_OpPermute2MaskedInt32x4(v)
-	case OpPermute2MaskedInt32x8:
-		return rewriteValueAMD64_OpPermute2MaskedInt32x8(v)
-	case OpPermute2MaskedInt64x2:
-		return rewriteValueAMD64_OpPermute2MaskedInt64x2(v)
-	case OpPermute2MaskedInt64x4:
-		return rewriteValueAMD64_OpPermute2MaskedInt64x4(v)
-	case OpPermute2MaskedInt64x8:
-		return rewriteValueAMD64_OpPermute2MaskedInt64x8(v)
-	case OpPermute2MaskedInt8x16:
-		return rewriteValueAMD64_OpPermute2MaskedInt8x16(v)
-	case OpPermute2MaskedInt8x32:
-		return rewriteValueAMD64_OpPermute2MaskedInt8x32(v)
-	case OpPermute2MaskedInt8x64:
-		return rewriteValueAMD64_OpPermute2MaskedInt8x64(v)
-	case OpPermute2MaskedUint16x16:
-		return rewriteValueAMD64_OpPermute2MaskedUint16x16(v)
-	case OpPermute2MaskedUint16x32:
-		return rewriteValueAMD64_OpPermute2MaskedUint16x32(v)
-	case OpPermute2MaskedUint16x8:
-		return rewriteValueAMD64_OpPermute2MaskedUint16x8(v)
-	case OpPermute2MaskedUint32x16:
-		return rewriteValueAMD64_OpPermute2MaskedUint32x16(v)
-	case OpPermute2MaskedUint32x4:
-		return rewriteValueAMD64_OpPermute2MaskedUint32x4(v)
-	case OpPermute2MaskedUint32x8:
-		return rewriteValueAMD64_OpPermute2MaskedUint32x8(v)
-	case OpPermute2MaskedUint64x2:
-		return rewriteValueAMD64_OpPermute2MaskedUint64x2(v)
-	case OpPermute2MaskedUint64x4:
-		return rewriteValueAMD64_OpPermute2MaskedUint64x4(v)
-	case OpPermute2MaskedUint64x8:
-		return rewriteValueAMD64_OpPermute2MaskedUint64x8(v)
-	case OpPermute2MaskedUint8x16:
-		return rewriteValueAMD64_OpPermute2MaskedUint8x16(v)
-	case OpPermute2MaskedUint8x32:
-		return rewriteValueAMD64_OpPermute2MaskedUint8x32(v)
-	case OpPermute2MaskedUint8x64:
-		return rewriteValueAMD64_OpPermute2MaskedUint8x64(v)
 	case OpPermute2Uint16x16:
 		v.Op = OpAMD64VPERMI2W256
 		return true
@@ -3981,54 +2877,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpPermuteInt8x64:
 		v.Op = OpAMD64VPERMB512
 		return true
-	case OpPermuteMaskedFloat32x16:
-		return rewriteValueAMD64_OpPermuteMaskedFloat32x16(v)
-	case OpPermuteMaskedFloat32x8:
-		return rewriteValueAMD64_OpPermuteMaskedFloat32x8(v)
-	case OpPermuteMaskedFloat64x4:
-		return rewriteValueAMD64_OpPermuteMaskedFloat64x4(v)
-	case OpPermuteMaskedFloat64x8:
-		return rewriteValueAMD64_OpPermuteMaskedFloat64x8(v)
-	case OpPermuteMaskedInt16x16:
-		return rewriteValueAMD64_OpPermuteMaskedInt16x16(v)
-	case OpPermuteMaskedInt16x32:
-		return rewriteValueAMD64_OpPermuteMaskedInt16x32(v)
-	case OpPermuteMaskedInt16x8:
-		return rewriteValueAMD64_OpPermuteMaskedInt16x8(v)
-	case OpPermuteMaskedInt32x16:
-		return rewriteValueAMD64_OpPermuteMaskedInt32x16(v)
-	case OpPermuteMaskedInt32x8:
-		return rewriteValueAMD64_OpPermuteMaskedInt32x8(v)
-	case OpPermuteMaskedInt64x4:
-		return rewriteValueAMD64_OpPermuteMaskedInt64x4(v)
-	case OpPermuteMaskedInt64x8:
-		return rewriteValueAMD64_OpPermuteMaskedInt64x8(v)
-	case OpPermuteMaskedInt8x16:
-		return rewriteValueAMD64_OpPermuteMaskedInt8x16(v)
-	case OpPermuteMaskedInt8x32:
-		return rewriteValueAMD64_OpPermuteMaskedInt8x32(v)
-	case OpPermuteMaskedInt8x64:
-		return rewriteValueAMD64_OpPermuteMaskedInt8x64(v)
-	case OpPermuteMaskedUint16x16:
-		return rewriteValueAMD64_OpPermuteMaskedUint16x16(v)
-	case OpPermuteMaskedUint16x32:
-		return rewriteValueAMD64_OpPermuteMaskedUint16x32(v)
-	case OpPermuteMaskedUint16x8:
-		return rewriteValueAMD64_OpPermuteMaskedUint16x8(v)
-	case OpPermuteMaskedUint32x16:
-		return rewriteValueAMD64_OpPermuteMaskedUint32x16(v)
-	case OpPermuteMaskedUint32x8:
-		return rewriteValueAMD64_OpPermuteMaskedUint32x8(v)
-	case OpPermuteMaskedUint64x4:
-		return rewriteValueAMD64_OpPermuteMaskedUint64x4(v)
-	case OpPermuteMaskedUint64x8:
-		return rewriteValueAMD64_OpPermuteMaskedUint64x8(v)
-	case OpPermuteMaskedUint8x16:
-		return rewriteValueAMD64_OpPermuteMaskedUint8x16(v)
-	case OpPermuteMaskedUint8x32:
-		return rewriteValueAMD64_OpPermuteMaskedUint8x32(v)
-	case OpPermuteMaskedUint8x64:
-		return rewriteValueAMD64_OpPermuteMaskedUint8x64(v)
 	case OpPermuteUint16x16:
 		v.Op = OpAMD64VPERMW256
 		return true
@@ -4093,18 +2941,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpReciprocalFloat64x8:
 		v.Op = OpAMD64VRCP14PD512
 		return true
-	case OpReciprocalMaskedFloat32x16:
-		return rewriteValueAMD64_OpReciprocalMaskedFloat32x16(v)
-	case OpReciprocalMaskedFloat32x4:
-		return rewriteValueAMD64_OpReciprocalMaskedFloat32x4(v)
-	case OpReciprocalMaskedFloat32x8:
-		return rewriteValueAMD64_OpReciprocalMaskedFloat32x8(v)
-	case OpReciprocalMaskedFloat64x2:
-		return rewriteValueAMD64_OpReciprocalMaskedFloat64x2(v)
-	case OpReciprocalMaskedFloat64x4:
-		return rewriteValueAMD64_OpReciprocalMaskedFloat64x4(v)
-	case OpReciprocalMaskedFloat64x8:
-		return rewriteValueAMD64_OpReciprocalMaskedFloat64x8(v)
 	case OpReciprocalSqrtFloat32x16:
 		v.Op = OpAMD64VRSQRT14PS512
 		return true
@@ -4123,18 +2959,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpReciprocalSqrtFloat64x8:
 		v.Op = OpAMD64VRSQRT14PD512
 		return true
-	case OpReciprocalSqrtMaskedFloat32x16:
-		return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x16(v)
-	case OpReciprocalSqrtMaskedFloat32x4:
-		return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x4(v)
-	case OpReciprocalSqrtMaskedFloat32x8:
-		return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x8(v)
-	case OpReciprocalSqrtMaskedFloat64x2:
-		return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x2(v)
-	case OpReciprocalSqrtMaskedFloat64x4:
-		return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x4(v)
-	case OpReciprocalSqrtMaskedFloat64x8:
-		return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x8(v)
 	case OpRotateAllLeftInt32x16:
 		v.Op = OpAMD64VPROLD512
 		return true
@@ -4153,30 +2977,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpRotateAllLeftInt64x8:
 		v.Op = OpAMD64VPROLQ512
 		return true
-	case OpRotateAllLeftMaskedInt32x16:
-		return rewriteValueAMD64_OpRotateAllLeftMaskedInt32x16(v)
-	case OpRotateAllLeftMaskedInt32x4:
-		return rewriteValueAMD64_OpRotateAllLeftMaskedInt32x4(v)
-	case OpRotateAllLeftMaskedInt32x8:
-		return rewriteValueAMD64_OpRotateAllLeftMaskedInt32x8(v)
-	case OpRotateAllLeftMaskedInt64x2:
-		return rewriteValueAMD64_OpRotateAllLeftMaskedInt64x2(v)
-	case OpRotateAllLeftMaskedInt64x4:
-		return rewriteValueAMD64_OpRotateAllLeftMaskedInt64x4(v)
-	case OpRotateAllLeftMaskedInt64x8:
-		return rewriteValueAMD64_OpRotateAllLeftMaskedInt64x8(v)
-	case OpRotateAllLeftMaskedUint32x16:
-		return rewriteValueAMD64_OpRotateAllLeftMaskedUint32x16(v)
-	case OpRotateAllLeftMaskedUint32x4:
-		return rewriteValueAMD64_OpRotateAllLeftMaskedUint32x4(v)
-	case OpRotateAllLeftMaskedUint32x8:
-		return rewriteValueAMD64_OpRotateAllLeftMaskedUint32x8(v)
-	case OpRotateAllLeftMaskedUint64x2:
-		return rewriteValueAMD64_OpRotateAllLeftMaskedUint64x2(v)
-	case OpRotateAllLeftMaskedUint64x4:
-		return rewriteValueAMD64_OpRotateAllLeftMaskedUint64x4(v)
-	case OpRotateAllLeftMaskedUint64x8:
-		return rewriteValueAMD64_OpRotateAllLeftMaskedUint64x8(v)
 	case OpRotateAllLeftUint32x16:
 		v.Op = OpAMD64VPROLD512
 		return true
@@ -4213,30 +3013,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpRotateAllRightInt64x8:
 		v.Op = OpAMD64VPRORQ512
 		return true
-	case OpRotateAllRightMaskedInt32x16:
-		return rewriteValueAMD64_OpRotateAllRightMaskedInt32x16(v)
-	case OpRotateAllRightMaskedInt32x4:
-		return rewriteValueAMD64_OpRotateAllRightMaskedInt32x4(v)
-	case OpRotateAllRightMaskedInt32x8:
-		return rewriteValueAMD64_OpRotateAllRightMaskedInt32x8(v)
-	case OpRotateAllRightMaskedInt64x2:
-		return rewriteValueAMD64_OpRotateAllRightMaskedInt64x2(v)
-	case OpRotateAllRightMaskedInt64x4:
-		return rewriteValueAMD64_OpRotateAllRightMaskedInt64x4(v)
-	case OpRotateAllRightMaskedInt64x8:
-		return rewriteValueAMD64_OpRotateAllRightMaskedInt64x8(v)
-	case OpRotateAllRightMaskedUint32x16:
-		return rewriteValueAMD64_OpRotateAllRightMaskedUint32x16(v)
-	case OpRotateAllRightMaskedUint32x4:
-		return rewriteValueAMD64_OpRotateAllRightMaskedUint32x4(v)
-	case OpRotateAllRightMaskedUint32x8:
-		return rewriteValueAMD64_OpRotateAllRightMaskedUint32x8(v)
-	case OpRotateAllRightMaskedUint64x2:
-		return rewriteValueAMD64_OpRotateAllRightMaskedUint64x2(v)
-	case OpRotateAllRightMaskedUint64x4:
-		return rewriteValueAMD64_OpRotateAllRightMaskedUint64x4(v)
-	case OpRotateAllRightMaskedUint64x8:
-		return rewriteValueAMD64_OpRotateAllRightMaskedUint64x8(v)
 	case OpRotateAllRightUint32x16:
 		v.Op = OpAMD64VPRORD512
 		return true
@@ -4285,30 +3061,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpRotateLeftInt64x8:
 		v.Op = OpAMD64VPROLVQ512
 		return true
-	case OpRotateLeftMaskedInt32x16:
-		return rewriteValueAMD64_OpRotateLeftMaskedInt32x16(v)
-	case OpRotateLeftMaskedInt32x4:
-		return rewriteValueAMD64_OpRotateLeftMaskedInt32x4(v)
-	case OpRotateLeftMaskedInt32x8:
-		return rewriteValueAMD64_OpRotateLeftMaskedInt32x8(v)
-	case OpRotateLeftMaskedInt64x2:
-		return rewriteValueAMD64_OpRotateLeftMaskedInt64x2(v)
-	case OpRotateLeftMaskedInt64x4:
-		return rewriteValueAMD64_OpRotateLeftMaskedInt64x4(v)
-	case OpRotateLeftMaskedInt64x8:
-		return rewriteValueAMD64_OpRotateLeftMaskedInt64x8(v)
-	case OpRotateLeftMaskedUint32x16:
-		return rewriteValueAMD64_OpRotateLeftMaskedUint32x16(v)
-	case OpRotateLeftMaskedUint32x4:
-		return rewriteValueAMD64_OpRotateLeftMaskedUint32x4(v)
-	case OpRotateLeftMaskedUint32x8:
-		return rewriteValueAMD64_OpRotateLeftMaskedUint32x8(v)
-	case OpRotateLeftMaskedUint64x2:
-		return rewriteValueAMD64_OpRotateLeftMaskedUint64x2(v)
-	case OpRotateLeftMaskedUint64x4:
-		return rewriteValueAMD64_OpRotateLeftMaskedUint64x4(v)
-	case OpRotateLeftMaskedUint64x8:
-		return rewriteValueAMD64_OpRotateLeftMaskedUint64x8(v)
 	case OpRotateLeftUint32x16:
 		v.Op = OpAMD64VPROLVD512
 		return true
@@ -4345,30 +3097,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpRotateRightInt64x8:
 		v.Op = OpAMD64VPRORVQ512
 		return true
-	case OpRotateRightMaskedInt32x16:
-		return rewriteValueAMD64_OpRotateRightMaskedInt32x16(v)
-	case OpRotateRightMaskedInt32x4:
-		return rewriteValueAMD64_OpRotateRightMaskedInt32x4(v)
-	case OpRotateRightMaskedInt32x8:
-		return rewriteValueAMD64_OpRotateRightMaskedInt32x8(v)
-	case OpRotateRightMaskedInt64x2:
-		return rewriteValueAMD64_OpRotateRightMaskedInt64x2(v)
-	case OpRotateRightMaskedInt64x4:
-		return rewriteValueAMD64_OpRotateRightMaskedInt64x4(v)
-	case OpRotateRightMaskedInt64x8:
-		return rewriteValueAMD64_OpRotateRightMaskedInt64x8(v)
-	case OpRotateRightMaskedUint32x16:
-		return rewriteValueAMD64_OpRotateRightMaskedUint32x16(v)
-	case OpRotateRightMaskedUint32x4:
-		return rewriteValueAMD64_OpRotateRightMaskedUint32x4(v)
-	case OpRotateRightMaskedUint32x8:
-		return rewriteValueAMD64_OpRotateRightMaskedUint32x8(v)
-	case OpRotateRightMaskedUint64x2:
-		return rewriteValueAMD64_OpRotateRightMaskedUint64x2(v)
-	case OpRotateRightMaskedUint64x4:
-		return rewriteValueAMD64_OpRotateRightMaskedUint64x4(v)
-	case OpRotateRightMaskedUint64x8:
-		return rewriteValueAMD64_OpRotateRightMaskedUint64x8(v)
 	case OpRotateRightUint32x16:
 		v.Op = OpAMD64VPRORVD512
 		return true
@@ -4415,18 +3143,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpRoundToEvenScaledFloat64x4(v)
 	case OpRoundToEvenScaledFloat64x8:
 		return rewriteValueAMD64_OpRoundToEvenScaledFloat64x8(v)
-	case OpRoundToEvenScaledMaskedFloat32x16:
-		return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x16(v)
-	case OpRoundToEvenScaledMaskedFloat32x4:
-		return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x4(v)
-	case OpRoundToEvenScaledMaskedFloat32x8:
-		return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x8(v)
-	case OpRoundToEvenScaledMaskedFloat64x2:
-		return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x2(v)
-	case OpRoundToEvenScaledMaskedFloat64x4:
-		return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x4(v)
-	case OpRoundToEvenScaledMaskedFloat64x8:
-		return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x8(v)
 	case OpRoundToEvenScaledResidueFloat32x16:
 		return rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x16(v)
 	case OpRoundToEvenScaledResidueFloat32x4:
@@ -4439,18 +3155,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x4(v)
 	case OpRoundToEvenScaledResidueFloat64x8:
 		return rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x8(v)
-	case OpRoundToEvenScaledResidueMaskedFloat32x16:
-		return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x16(v)
-	case OpRoundToEvenScaledResidueMaskedFloat32x4:
-		return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x4(v)
-	case OpRoundToEvenScaledResidueMaskedFloat32x8:
-		return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x8(v)
-	case OpRoundToEvenScaledResidueMaskedFloat64x2:
-		return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x2(v)
-	case OpRoundToEvenScaledResidueMaskedFloat64x4:
-		return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x4(v)
-	case OpRoundToEvenScaledResidueMaskedFloat64x8:
-		return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x8(v)
 	case OpRsh16Ux16:
 		return rewriteValueAMD64_OpRsh16Ux16(v)
 	case OpRsh16Ux32:
@@ -4533,18 +3237,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpScaleFloat64x8:
 		v.Op = OpAMD64VSCALEFPD512
 		return true
-	case OpScaleMaskedFloat32x16:
-		return rewriteValueAMD64_OpScaleMaskedFloat32x16(v)
-	case OpScaleMaskedFloat32x4:
-		return rewriteValueAMD64_OpScaleMaskedFloat32x4(v)
-	case OpScaleMaskedFloat32x8:
-		return rewriteValueAMD64_OpScaleMaskedFloat32x8(v)
-	case OpScaleMaskedFloat64x2:
-		return rewriteValueAMD64_OpScaleMaskedFloat64x2(v)
-	case OpScaleMaskedFloat64x4:
-		return rewriteValueAMD64_OpScaleMaskedFloat64x4(v)
-	case OpScaleMaskedFloat64x8:
-		return rewriteValueAMD64_OpScaleMaskedFloat64x8(v)
 	case OpSelect0:
 		return rewriteValueAMD64_OpSelect0(v)
 	case OpSelect1:
@@ -4688,42 +3380,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpShiftAllLeftConcatInt64x8:
 		v.Op = OpAMD64VPSHLDQ512
 		return true
-	case OpShiftAllLeftConcatMaskedInt16x16:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x16(v)
-	case OpShiftAllLeftConcatMaskedInt16x32:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x32(v)
-	case OpShiftAllLeftConcatMaskedInt16x8:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x8(v)
-	case OpShiftAllLeftConcatMaskedInt32x16:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x16(v)
-	case OpShiftAllLeftConcatMaskedInt32x4:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x4(v)
-	case OpShiftAllLeftConcatMaskedInt32x8:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x8(v)
-	case OpShiftAllLeftConcatMaskedInt64x2:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x2(v)
-	case OpShiftAllLeftConcatMaskedInt64x4:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x4(v)
-	case OpShiftAllLeftConcatMaskedInt64x8:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x8(v)
-	case OpShiftAllLeftConcatMaskedUint16x16:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x16(v)
-	case OpShiftAllLeftConcatMaskedUint16x32:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x32(v)
-	case OpShiftAllLeftConcatMaskedUint16x8:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x8(v)
-	case OpShiftAllLeftConcatMaskedUint32x16:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x16(v)
-	case OpShiftAllLeftConcatMaskedUint32x4:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x4(v)
-	case OpShiftAllLeftConcatMaskedUint32x8:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x8(v)
-	case OpShiftAllLeftConcatMaskedUint64x2:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x2(v)
-	case OpShiftAllLeftConcatMaskedUint64x4:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x4(v)
-	case OpShiftAllLeftConcatMaskedUint64x8:
-		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x8(v)
 	case OpShiftAllLeftConcatUint16x16:
 		v.Op = OpAMD64VPSHLDW256
 		return true
@@ -4778,42 +3434,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpShiftAllLeftInt64x8:
 		v.Op = OpAMD64VPSLLQ512
 		return true
-	case OpShiftAllLeftMaskedInt16x16:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedInt16x16(v)
-	case OpShiftAllLeftMaskedInt16x32:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedInt16x32(v)
-	case OpShiftAllLeftMaskedInt16x8:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedInt16x8(v)
-	case OpShiftAllLeftMaskedInt32x16:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedInt32x16(v)
-	case OpShiftAllLeftMaskedInt32x4:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedInt32x4(v)
-	case OpShiftAllLeftMaskedInt32x8:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedInt32x8(v)
-	case OpShiftAllLeftMaskedInt64x2:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedInt64x2(v)
-	case OpShiftAllLeftMaskedInt64x4:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedInt64x4(v)
-	case OpShiftAllLeftMaskedInt64x8:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedInt64x8(v)
-	case OpShiftAllLeftMaskedUint16x16:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedUint16x16(v)
-	case OpShiftAllLeftMaskedUint16x32:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedUint16x32(v)
-	case OpShiftAllLeftMaskedUint16x8:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedUint16x8(v)
-	case OpShiftAllLeftMaskedUint32x16:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedUint32x16(v)
-	case OpShiftAllLeftMaskedUint32x4:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedUint32x4(v)
-	case OpShiftAllLeftMaskedUint32x8:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedUint32x8(v)
-	case OpShiftAllLeftMaskedUint64x2:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedUint64x2(v)
-	case OpShiftAllLeftMaskedUint64x4:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedUint64x4(v)
-	case OpShiftAllLeftMaskedUint64x8:
-		return rewriteValueAMD64_OpShiftAllLeftMaskedUint64x8(v)
 	case OpShiftAllLeftUint16x16:
 		v.Op = OpAMD64VPSLLW256
 		return true
@@ -4868,42 +3488,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpShiftAllRightConcatInt64x8:
 		v.Op = OpAMD64VPSHRDQ512
 		return true
-	case OpShiftAllRightConcatMaskedInt16x16:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x16(v)
-	case OpShiftAllRightConcatMaskedInt16x32:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x32(v)
-	case OpShiftAllRightConcatMaskedInt16x8:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x8(v)
-	case OpShiftAllRightConcatMaskedInt32x16:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x16(v)
-	case OpShiftAllRightConcatMaskedInt32x4:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x4(v)
-	case OpShiftAllRightConcatMaskedInt32x8:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x8(v)
-	case OpShiftAllRightConcatMaskedInt64x2:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x2(v)
-	case OpShiftAllRightConcatMaskedInt64x4:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x4(v)
-	case OpShiftAllRightConcatMaskedInt64x8:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x8(v)
-	case OpShiftAllRightConcatMaskedUint16x16:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x16(v)
-	case OpShiftAllRightConcatMaskedUint16x32:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x32(v)
-	case OpShiftAllRightConcatMaskedUint16x8:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x8(v)
-	case OpShiftAllRightConcatMaskedUint32x16:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x16(v)
-	case OpShiftAllRightConcatMaskedUint32x4:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x4(v)
-	case OpShiftAllRightConcatMaskedUint32x8:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x8(v)
-	case OpShiftAllRightConcatMaskedUint64x2:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x2(v)
-	case OpShiftAllRightConcatMaskedUint64x4:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x4(v)
-	case OpShiftAllRightConcatMaskedUint64x8:
-		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x8(v)
 	case OpShiftAllRightConcatUint16x16:
 		v.Op = OpAMD64VPSHRDW256
 		return true
@@ -4958,42 +3542,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpShiftAllRightInt64x8:
 		v.Op = OpAMD64VPSRAQ512
 		return true
-	case OpShiftAllRightMaskedInt16x16:
-		return rewriteValueAMD64_OpShiftAllRightMaskedInt16x16(v)
-	case OpShiftAllRightMaskedInt16x32:
-		return rewriteValueAMD64_OpShiftAllRightMaskedInt16x32(v)
-	case OpShiftAllRightMaskedInt16x8:
-		return rewriteValueAMD64_OpShiftAllRightMaskedInt16x8(v)
-	case OpShiftAllRightMaskedInt32x16:
-		return rewriteValueAMD64_OpShiftAllRightMaskedInt32x16(v)
-	case OpShiftAllRightMaskedInt32x4:
-		return rewriteValueAMD64_OpShiftAllRightMaskedInt32x4(v)
-	case OpShiftAllRightMaskedInt32x8:
-		return rewriteValueAMD64_OpShiftAllRightMaskedInt32x8(v)
-	case OpShiftAllRightMaskedInt64x2:
-		return rewriteValueAMD64_OpShiftAllRightMaskedInt64x2(v)
-	case OpShiftAllRightMaskedInt64x4:
-		return rewriteValueAMD64_OpShiftAllRightMaskedInt64x4(v)
-	case OpShiftAllRightMaskedInt64x8:
-		return rewriteValueAMD64_OpShiftAllRightMaskedInt64x8(v)
-	case OpShiftAllRightMaskedUint16x16:
-		return rewriteValueAMD64_OpShiftAllRightMaskedUint16x16(v)
-	case OpShiftAllRightMaskedUint16x32:
-		return rewriteValueAMD64_OpShiftAllRightMaskedUint16x32(v)
-	case OpShiftAllRightMaskedUint16x8:
-		return rewriteValueAMD64_OpShiftAllRightMaskedUint16x8(v)
-	case OpShiftAllRightMaskedUint32x16:
-		return rewriteValueAMD64_OpShiftAllRightMaskedUint32x16(v)
-	case OpShiftAllRightMaskedUint32x4:
-		return rewriteValueAMD64_OpShiftAllRightMaskedUint32x4(v)
-	case OpShiftAllRightMaskedUint32x8:
-		return rewriteValueAMD64_OpShiftAllRightMaskedUint32x8(v)
-	case OpShiftAllRightMaskedUint64x2:
-		return rewriteValueAMD64_OpShiftAllRightMaskedUint64x2(v)
-	case OpShiftAllRightMaskedUint64x4:
-		return rewriteValueAMD64_OpShiftAllRightMaskedUint64x4(v)
-	case OpShiftAllRightMaskedUint64x8:
-		return rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v)
 	case OpShiftAllRightUint16x16:
 		v.Op = OpAMD64VPSRLW256
 		return true
@@ -5048,42 +3596,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpShiftLeftConcatInt64x8:
 		v.Op = OpAMD64VPSHLDVQ512
 		return true
-	case OpShiftLeftConcatMaskedInt16x16:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x16(v)
-	case OpShiftLeftConcatMaskedInt16x32:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x32(v)
-	case OpShiftLeftConcatMaskedInt16x8:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x8(v)
-	case OpShiftLeftConcatMaskedInt32x16:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x16(v)
-	case OpShiftLeftConcatMaskedInt32x4:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x4(v)
-	case OpShiftLeftConcatMaskedInt32x8:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x8(v)
-	case OpShiftLeftConcatMaskedInt64x2:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x2(v)
-	case OpShiftLeftConcatMaskedInt64x4:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x4(v)
-	case OpShiftLeftConcatMaskedInt64x8:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x8(v)
-	case OpShiftLeftConcatMaskedUint16x16:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x16(v)
-	case OpShiftLeftConcatMaskedUint16x32:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x32(v)
-	case OpShiftLeftConcatMaskedUint16x8:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x8(v)
-	case OpShiftLeftConcatMaskedUint32x16:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x16(v)
-	case OpShiftLeftConcatMaskedUint32x4:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x4(v)
-	case OpShiftLeftConcatMaskedUint32x8:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x8(v)
-	case OpShiftLeftConcatMaskedUint64x2:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x2(v)
-	case OpShiftLeftConcatMaskedUint64x4:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x4(v)
-	case OpShiftLeftConcatMaskedUint64x8:
-		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x8(v)
 	case OpShiftLeftConcatUint16x16:
 		v.Op = OpAMD64VPSHLDVW256
 		return true
@@ -5138,42 +3650,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpShiftLeftInt64x8:
 		v.Op = OpAMD64VPSLLVQ512
 		return true
-	case OpShiftLeftMaskedInt16x16:
-		return rewriteValueAMD64_OpShiftLeftMaskedInt16x16(v)
-	case OpShiftLeftMaskedInt16x32:
-		return rewriteValueAMD64_OpShiftLeftMaskedInt16x32(v)
-	case OpShiftLeftMaskedInt16x8:
-		return rewriteValueAMD64_OpShiftLeftMaskedInt16x8(v)
-	case OpShiftLeftMaskedInt32x16:
-		return rewriteValueAMD64_OpShiftLeftMaskedInt32x16(v)
-	case OpShiftLeftMaskedInt32x4:
-		return rewriteValueAMD64_OpShiftLeftMaskedInt32x4(v)
-	case OpShiftLeftMaskedInt32x8:
-		return rewriteValueAMD64_OpShiftLeftMaskedInt32x8(v)
-	case OpShiftLeftMaskedInt64x2:
-		return rewriteValueAMD64_OpShiftLeftMaskedInt64x2(v)
-	case OpShiftLeftMaskedInt64x4:
-		return rewriteValueAMD64_OpShiftLeftMaskedInt64x4(v)
-	case OpShiftLeftMaskedInt64x8:
-		return rewriteValueAMD64_OpShiftLeftMaskedInt64x8(v)
-	case OpShiftLeftMaskedUint16x16:
-		return rewriteValueAMD64_OpShiftLeftMaskedUint16x16(v)
-	case OpShiftLeftMaskedUint16x32:
-		return rewriteValueAMD64_OpShiftLeftMaskedUint16x32(v)
-	case OpShiftLeftMaskedUint16x8:
-		return rewriteValueAMD64_OpShiftLeftMaskedUint16x8(v)
-	case OpShiftLeftMaskedUint32x16:
-		return rewriteValueAMD64_OpShiftLeftMaskedUint32x16(v)
-	case OpShiftLeftMaskedUint32x4:
-		return rewriteValueAMD64_OpShiftLeftMaskedUint32x4(v)
-	case OpShiftLeftMaskedUint32x8:
-		return rewriteValueAMD64_OpShiftLeftMaskedUint32x8(v)
-	case OpShiftLeftMaskedUint64x2:
-		return rewriteValueAMD64_OpShiftLeftMaskedUint64x2(v)
-	case OpShiftLeftMaskedUint64x4:
-		return rewriteValueAMD64_OpShiftLeftMaskedUint64x4(v)
-	case OpShiftLeftMaskedUint64x8:
-		return rewriteValueAMD64_OpShiftLeftMaskedUint64x8(v)
 	case OpShiftLeftUint16x16:
 		v.Op = OpAMD64VPSLLVW256
 		return true
@@ -5228,42 +3704,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpShiftRightConcatInt64x8:
 		v.Op = OpAMD64VPSHRDVQ512
 		return true
-	case OpShiftRightConcatMaskedInt16x16:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedInt16x16(v)
-	case OpShiftRightConcatMaskedInt16x32:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedInt16x32(v)
-	case OpShiftRightConcatMaskedInt16x8:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedInt16x8(v)
-	case OpShiftRightConcatMaskedInt32x16:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedInt32x16(v)
-	case OpShiftRightConcatMaskedInt32x4:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedInt32x4(v)
-	case OpShiftRightConcatMaskedInt32x8:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedInt32x8(v)
-	case OpShiftRightConcatMaskedInt64x2:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedInt64x2(v)
-	case OpShiftRightConcatMaskedInt64x4:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedInt64x4(v)
-	case OpShiftRightConcatMaskedInt64x8:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedInt64x8(v)
-	case OpShiftRightConcatMaskedUint16x16:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedUint16x16(v)
-	case OpShiftRightConcatMaskedUint16x32:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedUint16x32(v)
-	case OpShiftRightConcatMaskedUint16x8:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedUint16x8(v)
-	case OpShiftRightConcatMaskedUint32x16:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedUint32x16(v)
-	case OpShiftRightConcatMaskedUint32x4:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedUint32x4(v)
-	case OpShiftRightConcatMaskedUint32x8:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedUint32x8(v)
-	case OpShiftRightConcatMaskedUint64x2:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedUint64x2(v)
-	case OpShiftRightConcatMaskedUint64x4:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedUint64x4(v)
-	case OpShiftRightConcatMaskedUint64x8:
-		return rewriteValueAMD64_OpShiftRightConcatMaskedUint64x8(v)
 	case OpShiftRightConcatUint16x16:
 		v.Op = OpAMD64VPSHRDVW256
 		return true
@@ -5318,42 +3758,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpShiftRightInt64x8:
 		v.Op = OpAMD64VPSRAVQ512
 		return true
-	case OpShiftRightMaskedInt16x16:
-		return rewriteValueAMD64_OpShiftRightMaskedInt16x16(v)
-	case OpShiftRightMaskedInt16x32:
-		return rewriteValueAMD64_OpShiftRightMaskedInt16x32(v)
-	case OpShiftRightMaskedInt16x8:
-		return rewriteValueAMD64_OpShiftRightMaskedInt16x8(v)
-	case OpShiftRightMaskedInt32x16:
-		return rewriteValueAMD64_OpShiftRightMaskedInt32x16(v)
-	case OpShiftRightMaskedInt32x4:
-		return rewriteValueAMD64_OpShiftRightMaskedInt32x4(v)
-	case OpShiftRightMaskedInt32x8:
-		return rewriteValueAMD64_OpShiftRightMaskedInt32x8(v)
-	case OpShiftRightMaskedInt64x2:
-		return rewriteValueAMD64_OpShiftRightMaskedInt64x2(v)
-	case OpShiftRightMaskedInt64x4:
-		return rewriteValueAMD64_OpShiftRightMaskedInt64x4(v)
-	case OpShiftRightMaskedInt64x8:
-		return rewriteValueAMD64_OpShiftRightMaskedInt64x8(v)
-	case OpShiftRightMaskedUint16x16:
-		return rewriteValueAMD64_OpShiftRightMaskedUint16x16(v)
-	case OpShiftRightMaskedUint16x32:
-		return rewriteValueAMD64_OpShiftRightMaskedUint16x32(v)
-	case OpShiftRightMaskedUint16x8:
-		return rewriteValueAMD64_OpShiftRightMaskedUint16x8(v)
-	case OpShiftRightMaskedUint32x16:
-		return rewriteValueAMD64_OpShiftRightMaskedUint32x16(v)
-	case OpShiftRightMaskedUint32x4:
-		return rewriteValueAMD64_OpShiftRightMaskedUint32x4(v)
-	case OpShiftRightMaskedUint32x8:
-		return rewriteValueAMD64_OpShiftRightMaskedUint32x8(v)
-	case OpShiftRightMaskedUint64x2:
-		return rewriteValueAMD64_OpShiftRightMaskedUint64x2(v)
-	case OpShiftRightMaskedUint64x4:
-		return rewriteValueAMD64_OpShiftRightMaskedUint64x4(v)
-	case OpShiftRightMaskedUint64x8:
-		return rewriteValueAMD64_OpShiftRightMaskedUint64x8(v)
 	case OpShiftRightUint16x16:
 		v.Op = OpAMD64VPSRLVW256
 		return true
@@ -5429,18 +3833,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpSqrtFloat64x8:
 		v.Op = OpAMD64VSQRTPD512
 		return true
-	case OpSqrtMaskedFloat32x16:
-		return rewriteValueAMD64_OpSqrtMaskedFloat32x16(v)
-	case OpSqrtMaskedFloat32x4:
-		return rewriteValueAMD64_OpSqrtMaskedFloat32x4(v)
-	case OpSqrtMaskedFloat32x8:
-		return rewriteValueAMD64_OpSqrtMaskedFloat32x8(v)
-	case OpSqrtMaskedFloat64x2:
-		return rewriteValueAMD64_OpSqrtMaskedFloat64x2(v)
-	case OpSqrtMaskedFloat64x4:
-		return rewriteValueAMD64_OpSqrtMaskedFloat64x4(v)
-	case OpSqrtMaskedFloat64x8:
-		return rewriteValueAMD64_OpSqrtMaskedFloat64x8(v)
 	case OpStaticCall:
 		v.Op = OpAMD64CALLstatic
 		return true
@@ -5550,66 +3942,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpSubInt8x64:
 		v.Op = OpAMD64VPSUBB512
 		return true
-	case OpSubMaskedFloat32x16:
-		return rewriteValueAMD64_OpSubMaskedFloat32x16(v)
-	case OpSubMaskedFloat32x4:
-		return rewriteValueAMD64_OpSubMaskedFloat32x4(v)
-	case OpSubMaskedFloat32x8:
-		return rewriteValueAMD64_OpSubMaskedFloat32x8(v)
-	case OpSubMaskedFloat64x2:
-		return rewriteValueAMD64_OpSubMaskedFloat64x2(v)
-	case OpSubMaskedFloat64x4:
-		return rewriteValueAMD64_OpSubMaskedFloat64x4(v)
-	case OpSubMaskedFloat64x8:
-		return rewriteValueAMD64_OpSubMaskedFloat64x8(v)
-	case OpSubMaskedInt16x16:
-		return rewriteValueAMD64_OpSubMaskedInt16x16(v)
-	case OpSubMaskedInt16x32:
-		return rewriteValueAMD64_OpSubMaskedInt16x32(v)
-	case OpSubMaskedInt16x8:
-		return rewriteValueAMD64_OpSubMaskedInt16x8(v)
-	case OpSubMaskedInt32x16:
-		return rewriteValueAMD64_OpSubMaskedInt32x16(v)
-	case OpSubMaskedInt32x4:
-		return rewriteValueAMD64_OpSubMaskedInt32x4(v)
-	case OpSubMaskedInt32x8:
-		return rewriteValueAMD64_OpSubMaskedInt32x8(v)
-	case OpSubMaskedInt64x2:
-		return rewriteValueAMD64_OpSubMaskedInt64x2(v)
-	case OpSubMaskedInt64x4:
-		return rewriteValueAMD64_OpSubMaskedInt64x4(v)
-	case OpSubMaskedInt64x8:
-		return rewriteValueAMD64_OpSubMaskedInt64x8(v)
-	case OpSubMaskedInt8x16:
-		return rewriteValueAMD64_OpSubMaskedInt8x16(v)
-	case OpSubMaskedInt8x32:
-		return rewriteValueAMD64_OpSubMaskedInt8x32(v)
-	case OpSubMaskedInt8x64:
-		return rewriteValueAMD64_OpSubMaskedInt8x64(v)
-	case OpSubMaskedUint16x16:
-		return rewriteValueAMD64_OpSubMaskedUint16x16(v)
-	case OpSubMaskedUint16x32:
-		return rewriteValueAMD64_OpSubMaskedUint16x32(v)
-	case OpSubMaskedUint16x8:
-		return rewriteValueAMD64_OpSubMaskedUint16x8(v)
-	case OpSubMaskedUint32x16:
-		return rewriteValueAMD64_OpSubMaskedUint32x16(v)
-	case OpSubMaskedUint32x4:
-		return rewriteValueAMD64_OpSubMaskedUint32x4(v)
-	case OpSubMaskedUint32x8:
-		return rewriteValueAMD64_OpSubMaskedUint32x8(v)
-	case OpSubMaskedUint64x2:
-		return rewriteValueAMD64_OpSubMaskedUint64x2(v)
-	case OpSubMaskedUint64x4:
-		return rewriteValueAMD64_OpSubMaskedUint64x4(v)
-	case OpSubMaskedUint64x8:
-		return rewriteValueAMD64_OpSubMaskedUint64x8(v)
-	case OpSubMaskedUint8x16:
-		return rewriteValueAMD64_OpSubMaskedUint8x16(v)
-	case OpSubMaskedUint8x32:
-		return rewriteValueAMD64_OpSubMaskedUint8x32(v)
-	case OpSubMaskedUint8x64:
-		return rewriteValueAMD64_OpSubMaskedUint8x64(v)
 	case OpSubPairsFloat32x4:
 		v.Op = OpAMD64VHSUBPS128
 		return true
@@ -5673,30 +4005,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpSubSaturatedInt8x64:
 		v.Op = OpAMD64VPSUBSB512
 		return true
-	case OpSubSaturatedMaskedInt16x16:
-		return rewriteValueAMD64_OpSubSaturatedMaskedInt16x16(v)
-	case OpSubSaturatedMaskedInt16x32:
-		return rewriteValueAMD64_OpSubSaturatedMaskedInt16x32(v)
-	case OpSubSaturatedMaskedInt16x8:
-		return rewriteValueAMD64_OpSubSaturatedMaskedInt16x8(v)
-	case OpSubSaturatedMaskedInt8x16:
-		return rewriteValueAMD64_OpSubSaturatedMaskedInt8x16(v)
-	case OpSubSaturatedMaskedInt8x32:
-		return rewriteValueAMD64_OpSubSaturatedMaskedInt8x32(v)
-	case OpSubSaturatedMaskedInt8x64:
-		return rewriteValueAMD64_OpSubSaturatedMaskedInt8x64(v)
-	case OpSubSaturatedMaskedUint16x16:
-		return rewriteValueAMD64_OpSubSaturatedMaskedUint16x16(v)
-	case OpSubSaturatedMaskedUint16x32:
-		return rewriteValueAMD64_OpSubSaturatedMaskedUint16x32(v)
-	case OpSubSaturatedMaskedUint16x8:
-		return rewriteValueAMD64_OpSubSaturatedMaskedUint16x8(v)
-	case OpSubSaturatedMaskedUint8x16:
-		return rewriteValueAMD64_OpSubSaturatedMaskedUint8x16(v)
-	case OpSubSaturatedMaskedUint8x32:
-		return rewriteValueAMD64_OpSubSaturatedMaskedUint8x32(v)
-	case OpSubSaturatedMaskedUint8x64:
-		return rewriteValueAMD64_OpSubSaturatedMaskedUint8x64(v)
 	case OpSubSaturatedUint16x16:
 		v.Op = OpAMD64VPSUBUSW256
 		return true
@@ -5794,18 +4102,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpTruncScaledFloat64x4(v)
 	case OpTruncScaledFloat64x8:
 		return rewriteValueAMD64_OpTruncScaledFloat64x8(v)
-	case OpTruncScaledMaskedFloat32x16:
-		return rewriteValueAMD64_OpTruncScaledMaskedFloat32x16(v)
-	case OpTruncScaledMaskedFloat32x4:
-		return rewriteValueAMD64_OpTruncScaledMaskedFloat32x4(v)
-	case OpTruncScaledMaskedFloat32x8:
-		return rewriteValueAMD64_OpTruncScaledMaskedFloat32x8(v)
-	case OpTruncScaledMaskedFloat64x2:
-		return rewriteValueAMD64_OpTruncScaledMaskedFloat64x2(v)
-	case OpTruncScaledMaskedFloat64x4:
-		return rewriteValueAMD64_OpTruncScaledMaskedFloat64x4(v)
-	case OpTruncScaledMaskedFloat64x8:
-		return rewriteValueAMD64_OpTruncScaledMaskedFloat64x8(v)
 	case OpTruncScaledResidueFloat32x16:
 		return rewriteValueAMD64_OpTruncScaledResidueFloat32x16(v)
 	case OpTruncScaledResidueFloat32x4:
@@ -5818,18 +4114,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpTruncScaledResidueFloat64x4(v)
 	case OpTruncScaledResidueFloat64x8:
 		return rewriteValueAMD64_OpTruncScaledResidueFloat64x8(v)
-	case OpTruncScaledResidueMaskedFloat32x16:
-		return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x16(v)
-	case OpTruncScaledResidueMaskedFloat32x4:
-		return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x4(v)
-	case OpTruncScaledResidueMaskedFloat32x8:
-		return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x8(v)
-	case OpTruncScaledResidueMaskedFloat64x2:
-		return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x2(v)
-	case OpTruncScaledResidueMaskedFloat64x4:
-		return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x4(v)
-	case OpTruncScaledResidueMaskedFloat64x8:
-		return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x8(v)
 	case OpWB:
 		v.Op = OpAMD64LoweredWB
 		return true
@@ -5881,30 +4165,6 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpXorInt8x64:
 		v.Op = OpAMD64VPXORD512
 		return true
-	case OpXorMaskedInt32x16:
-		return rewriteValueAMD64_OpXorMaskedInt32x16(v)
-	case OpXorMaskedInt32x4:
-		return rewriteValueAMD64_OpXorMaskedInt32x4(v)
-	case OpXorMaskedInt32x8:
-		return rewriteValueAMD64_OpXorMaskedInt32x8(v)
-	case OpXorMaskedInt64x2:
-		return rewriteValueAMD64_OpXorMaskedInt64x2(v)
-	case OpXorMaskedInt64x4:
-		return rewriteValueAMD64_OpXorMaskedInt64x4(v)
-	case OpXorMaskedInt64x8:
-		return rewriteValueAMD64_OpXorMaskedInt64x8(v)
-	case OpXorMaskedUint32x16:
-		return rewriteValueAMD64_OpXorMaskedUint32x16(v)
-	case OpXorMaskedUint32x4:
-		return rewriteValueAMD64_OpXorMaskedUint32x4(v)
-	case OpXorMaskedUint32x8:
-		return rewriteValueAMD64_OpXorMaskedUint32x8(v)
-	case OpXorMaskedUint64x2:
-		return rewriteValueAMD64_OpXorMaskedUint64x2(v)
-	case OpXorMaskedUint64x4:
-		return rewriteValueAMD64_OpXorMaskedUint64x4(v)
-	case OpXorMaskedUint64x8:
-		return rewriteValueAMD64_OpXorMaskedUint64x8(v)
 	case OpXorUint16x16:
 		v.Op = OpAMD64VPXOR256
 		return true
@@ -27893,66 +26153,6 @@ func rewriteValueAMD64_OpAMD64VPSLLD512(v *Value) bool {
 	}
 	return false
 }
-func rewriteValueAMD64_OpAMD64VPSLLDMasked128(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSLLDMasked128 x (MOVQconst [c]) mask)
-	// result: (VPSLLDMasked128const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSLLDMasked128const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
-func rewriteValueAMD64_OpAMD64VPSLLDMasked256(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSLLDMasked256 x (MOVQconst [c]) mask)
-	// result: (VPSLLDMasked256const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSLLDMasked256const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
-func rewriteValueAMD64_OpAMD64VPSLLDMasked512(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSLLDMasked512 x (MOVQconst [c]) mask)
-	// result: (VPSLLDMasked512const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSLLDMasked512const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
 func rewriteValueAMD64_OpAMD64VPSLLQ128(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -28007,66 +26207,6 @@ func rewriteValueAMD64_OpAMD64VPSLLQ512(v *Value) bool {
 	}
 	return false
 }
-func rewriteValueAMD64_OpAMD64VPSLLQMasked128(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSLLQMasked128 x (MOVQconst [c]) mask)
-	// result: (VPSLLQMasked128const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSLLQMasked128const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
-func rewriteValueAMD64_OpAMD64VPSLLQMasked256(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSLLQMasked256 x (MOVQconst [c]) mask)
-	// result: (VPSLLQMasked256const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSLLQMasked256const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
-func rewriteValueAMD64_OpAMD64VPSLLQMasked512(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSLLQMasked512 x (MOVQconst [c]) mask)
-	// result: (VPSLLQMasked512const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSLLQMasked512const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
 func rewriteValueAMD64_OpAMD64VPSLLW128(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -28121,66 +26261,6 @@ func rewriteValueAMD64_OpAMD64VPSLLW512(v *Value) bool {
 	}
 	return false
 }
-func rewriteValueAMD64_OpAMD64VPSLLWMasked128(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSLLWMasked128 x (MOVQconst [c]) mask)
-	// result: (VPSLLWMasked128const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSLLWMasked128const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
-func rewriteValueAMD64_OpAMD64VPSLLWMasked256(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSLLWMasked256 x (MOVQconst [c]) mask)
-	// result: (VPSLLWMasked256const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSLLWMasked256const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
-func rewriteValueAMD64_OpAMD64VPSLLWMasked512(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSLLWMasked512 x (MOVQconst [c]) mask)
-	// result: (VPSLLWMasked512const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSLLWMasked512const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
 func rewriteValueAMD64_OpAMD64VPSRAD128(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -28235,66 +26315,6 @@ func rewriteValueAMD64_OpAMD64VPSRAD512(v *Value) bool {
 	}
 	return false
 }
-func rewriteValueAMD64_OpAMD64VPSRADMasked128(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSRADMasked128 x (MOVQconst [c]) mask)
-	// result: (VPSRADMasked128const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSRADMasked128const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
-func rewriteValueAMD64_OpAMD64VPSRADMasked256(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSRADMasked256 x (MOVQconst [c]) mask)
-	// result: (VPSRADMasked256const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSRADMasked256const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
-func rewriteValueAMD64_OpAMD64VPSRADMasked512(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSRADMasked512 x (MOVQconst [c]) mask)
-	// result: (VPSRADMasked512const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSRADMasked512const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
 func rewriteValueAMD64_OpAMD64VPSRAQ128(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -28349,66 +26369,6 @@ func rewriteValueAMD64_OpAMD64VPSRAQ512(v *Value) bool {
 	}
 	return false
 }
-func rewriteValueAMD64_OpAMD64VPSRAQMasked128(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSRAQMasked128 x (MOVQconst [c]) mask)
-	// result: (VPSRAQMasked128const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSRAQMasked128const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
-func rewriteValueAMD64_OpAMD64VPSRAQMasked256(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSRAQMasked256 x (MOVQconst [c]) mask)
-	// result: (VPSRAQMasked256const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSRAQMasked256const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
-func rewriteValueAMD64_OpAMD64VPSRAQMasked512(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSRAQMasked512 x (MOVQconst [c]) mask)
-	// result: (VPSRAQMasked512const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSRAQMasked512const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
 func rewriteValueAMD64_OpAMD64VPSRAW128(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -28463,66 +26423,6 @@ func rewriteValueAMD64_OpAMD64VPSRAW512(v *Value) bool {
 	}
 	return false
 }
-func rewriteValueAMD64_OpAMD64VPSRAWMasked128(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSRAWMasked128 x (MOVQconst [c]) mask)
-	// result: (VPSRAWMasked128const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSRAWMasked128const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
-func rewriteValueAMD64_OpAMD64VPSRAWMasked256(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSRAWMasked256 x (MOVQconst [c]) mask)
-	// result: (VPSRAWMasked256const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSRAWMasked256const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
-func rewriteValueAMD64_OpAMD64VPSRAWMasked512(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (VPSRAWMasked512 x (MOVQconst [c]) mask)
-	// result: (VPSRAWMasked512const [uint8(c)] x mask)
-	for {
-		x := v_0
-		if v_1.Op != OpAMD64MOVQconst {
-			break
-		}
-		c := auxIntToInt64(v_1.AuxInt)
-		mask := v_2
-		v.reset(OpAMD64VPSRAWMasked512const)
-		v.AuxInt = uint8ToAuxInt(uint8(c))
-		v.AddArg2(x, mask)
-		return true
-	}
-	return false
-}
 func rewriteValueAMD64_OpAMD64XADDLlock(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@@ -29423,1134 +27323,6 @@ func rewriteValueAMD64_OpAMD64XORQmodify(v *Value) bool {
 	}
 	return false
 }
-func rewriteValueAMD64_OpAbsMaskedInt16x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AbsMaskedInt16x16 x mask)
-	// result: (VPABSWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPABSWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAbsMaskedInt16x32(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AbsMaskedInt16x32 x mask)
-	// result: (VPABSWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPABSWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAbsMaskedInt16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AbsMaskedInt16x8 x mask)
-	// result: (VPABSWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPABSWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAbsMaskedInt32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AbsMaskedInt32x16 x mask)
-	// result: (VPABSDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPABSDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAbsMaskedInt32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AbsMaskedInt32x4 x mask)
-	// result: (VPABSDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPABSDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAbsMaskedInt32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AbsMaskedInt32x8 x mask)
-	// result: (VPABSDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPABSDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAbsMaskedInt64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AbsMaskedInt64x2 x mask)
-	// result: (VPABSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPABSQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAbsMaskedInt64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AbsMaskedInt64x4 x mask)
-	// result: (VPABSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPABSQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAbsMaskedInt64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AbsMaskedInt64x8 x mask)
-	// result: (VPABSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPABSQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAbsMaskedInt8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AbsMaskedInt8x16 x mask)
-	// result: (VPABSBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPABSBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAbsMaskedInt8x32(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AbsMaskedInt8x32 x mask)
-	// result: (VPABSBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPABSBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAbsMaskedInt8x64(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AbsMaskedInt8x64 x mask)
-	// result: (VPABSBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPABSBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddDotProdPairsSaturatedMaskedInt32x16 x y z mask)
-	// result: (VPDPWSSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPDPWSSDSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddDotProdPairsSaturatedMaskedInt32x4 x y z mask)
-	// result: (VPDPWSSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPDPWSSDSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddDotProdPairsSaturatedMaskedInt32x8 x y z mask)
-	// result: (VPDPWSSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPDPWSSDSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddDotProdQuadrupleMaskedInt32x16 x y z mask)
-	// result: (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPDPBUSDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddDotProdQuadrupleMaskedInt32x4 x y z mask)
-	// result: (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPDPBUSDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddDotProdQuadrupleMaskedInt32x8 x y z mask)
-	// result: (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPDPBUSDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddDotProdQuadrupleSaturatedMaskedInt32x16 x y z mask)
-	// result: (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPDPBUSDSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddDotProdQuadrupleSaturatedMaskedInt32x4 x y z mask)
-	// result: (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPDPBUSDSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddDotProdQuadrupleSaturatedMaskedInt32x8 x y z mask)
-	// result: (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPDPBUSDSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedFloat32x16 x y mask)
-	// result: (VADDPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VADDPSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedFloat32x4 x y mask)
-	// result: (VADDPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VADDPSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedFloat32x8 x y mask)
-	// result: (VADDPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VADDPSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedFloat64x2 x y mask)
-	// result: (VADDPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VADDPDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedFloat64x4 x y mask)
-	// result: (VADDPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VADDPDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedFloat64x8 x y mask)
-	// result: (VADDPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VADDPDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedInt16x16 x y mask)
-	// result: (VPADDWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedInt16x32 x y mask)
-	// result: (VPADDWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedInt16x8 x y mask)
-	// result: (VPADDWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedInt32x16 x y mask)
-	// result: (VPADDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedInt32x4 x y mask)
-	// result: (VPADDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedInt32x8 x y mask)
-	// result: (VPADDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedInt64x2 x y mask)
-	// result: (VPADDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedInt64x4 x y mask)
-	// result: (VPADDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedInt64x8 x y mask)
-	// result: (VPADDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedInt8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedInt8x16 x y mask)
-	// result: (VPADDBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedInt8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedInt8x32 x y mask)
-	// result: (VPADDBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedInt8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedInt8x64 x y mask)
-	// result: (VPADDBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedUint16x16 x y mask)
-	// result: (VPADDWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedUint16x32 x y mask)
-	// result: (VPADDWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedUint16x8 x y mask)
-	// result: (VPADDWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedUint32x16 x y mask)
-	// result: (VPADDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedUint32x4 x y mask)
-	// result: (VPADDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedUint32x8 x y mask)
-	// result: (VPADDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedUint64x2 x y mask)
-	// result: (VPADDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedUint64x4 x y mask)
-	// result: (VPADDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedUint64x8 x y mask)
-	// result: (VPADDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedUint8x16 x y mask)
-	// result: (VPADDBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedUint8x32 x y mask)
-	// result: (VPADDBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddMaskedUint8x64 x y mask)
-	// result: (VPADDBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddSaturatedMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddSaturatedMaskedInt16x16 x y mask)
-	// result: (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDSWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddSaturatedMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddSaturatedMaskedInt16x32 x y mask)
-	// result: (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDSWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddSaturatedMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddSaturatedMaskedInt16x8 x y mask)
-	// result: (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDSWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddSaturatedMaskedInt8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddSaturatedMaskedInt8x16 x y mask)
-	// result: (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDSBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddSaturatedMaskedInt8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddSaturatedMaskedInt8x32 x y mask)
-	// result: (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDSBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddSaturatedMaskedInt8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddSaturatedMaskedInt8x64 x y mask)
-	// result: (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDSBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddSaturatedMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddSaturatedMaskedUint16x16 x y mask)
-	// result: (VPADDUSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDUSWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddSaturatedMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddSaturatedMaskedUint16x32 x y mask)
-	// result: (VPADDUSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDUSWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddSaturatedMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddSaturatedMaskedUint16x8 x y mask)
-	// result: (VPADDUSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDUSWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddSaturatedMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddSaturatedMaskedUint8x16 x y mask)
-	// result: (VPADDUSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDUSBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddSaturatedMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddSaturatedMaskedUint8x32 x y mask)
-	// result: (VPADDUSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDUSBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAddSaturatedMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AddSaturatedMaskedUint8x64 x y mask)
-	// result: (VPADDUSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPADDUSBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpAddr(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (Addr {sym} base)
@@ -30564,438 +27336,6 @@ func rewriteValueAMD64_OpAddr(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpAndMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndMaskedInt32x16 x y mask)
-	// result: (VPANDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndMaskedInt32x4 x y mask)
-	// result: (VPANDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndMaskedInt32x8 x y mask)
-	// result: (VPANDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndMaskedInt64x2 x y mask)
-	// result: (VPANDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndMaskedInt64x4 x y mask)
-	// result: (VPANDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndMaskedInt64x8 x y mask)
-	// result: (VPANDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndMaskedUint32x16 x y mask)
-	// result: (VPANDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndMaskedUint32x4 x y mask)
-	// result: (VPANDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndMaskedUint32x8 x y mask)
-	// result: (VPANDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndMaskedUint64x2 x y mask)
-	// result: (VPANDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndMaskedUint64x4 x y mask)
-	// result: (VPANDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndMaskedUint64x8 x y mask)
-	// result: (VPANDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndNotMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndNotMaskedInt32x16 x y mask)
-	// result: (VPANDNDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDNDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndNotMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndNotMaskedInt32x4 x y mask)
-	// result: (VPANDNDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDNDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndNotMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndNotMaskedInt32x8 x y mask)
-	// result: (VPANDNDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDNDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndNotMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndNotMaskedInt64x2 x y mask)
-	// result: (VPANDNQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDNQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndNotMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndNotMaskedInt64x4 x y mask)
-	// result: (VPANDNQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDNQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndNotMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndNotMaskedInt64x8 x y mask)
-	// result: (VPANDNQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDNQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndNotMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndNotMaskedUint32x16 x y mask)
-	// result: (VPANDNDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDNDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndNotMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndNotMaskedUint32x4 x y mask)
-	// result: (VPANDNDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDNDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndNotMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndNotMaskedUint32x8 x y mask)
-	// result: (VPANDNDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDNDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndNotMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndNotMaskedUint64x2 x y mask)
-	// result: (VPANDNQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDNQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndNotMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndNotMaskedUint64x4 x y mask)
-	// result: (VPANDNQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDNQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAndNotMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AndNotMaskedUint64x8 x y mask)
-	// result: (VPANDNQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPANDNQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpAtomicAdd32(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@@ -31361,114 +27701,6 @@ func rewriteValueAMD64_OpAtomicStorePtrNoWB(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpAverageMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AverageMaskedUint16x16 x y mask)
-	// result: (VPAVGWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPAVGWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAverageMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AverageMaskedUint16x32 x y mask)
-	// result: (VPAVGWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPAVGWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAverageMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AverageMaskedUint16x8 x y mask)
-	// result: (VPAVGWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPAVGWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAverageMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AverageMaskedUint8x16 x y mask)
-	// result: (VPAVGBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPAVGBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAverageMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AverageMaskedUint8x32 x y mask)
-	// result: (VPAVGBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPAVGBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpAverageMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (AverageMaskedUint8x64 x y mask)
-	// result: (VPAVGBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPAVGBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpBitLen16(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
@@ -31646,486 +27878,6 @@ func rewriteValueAMD64_OpBitLen8(v *Value) bool {
 	}
 	return false
 }
-func rewriteValueAMD64_OpBroadcast128MaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast128MaskedFloat32x4 x mask)
-	// result: (VBROADCASTSSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VBROADCASTSSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast128MaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast128MaskedFloat64x2 x mask)
-	// result: (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast128MaskedInt16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast128MaskedInt16x8 x mask)
-	// result: (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast128MaskedInt32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast128MaskedInt32x4 x mask)
-	// result: (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast128MaskedInt64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast128MaskedInt64x2 x mask)
-	// result: (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast128MaskedInt8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast128MaskedInt8x16 x mask)
-	// result: (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast128MaskedUint16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast128MaskedUint16x8 x mask)
-	// result: (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast128MaskedUint32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast128MaskedUint32x4 x mask)
-	// result: (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast128MaskedUint64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast128MaskedUint64x2 x mask)
-	// result: (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast128MaskedUint8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast128MaskedUint8x16 x mask)
-	// result: (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast256MaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast256MaskedFloat32x4 x mask)
-	// result: (VBROADCASTSSMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VBROADCASTSSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast256MaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast256MaskedFloat64x2 x mask)
-	// result: (VBROADCASTSDMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VBROADCASTSDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast256MaskedInt16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast256MaskedInt16x8 x mask)
-	// result: (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast256MaskedInt32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast256MaskedInt32x4 x mask)
-	// result: (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast256MaskedInt64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast256MaskedInt64x2 x mask)
-	// result: (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast256MaskedInt8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast256MaskedInt8x16 x mask)
-	// result: (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast256MaskedUint16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast256MaskedUint16x8 x mask)
-	// result: (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast256MaskedUint32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast256MaskedUint32x4 x mask)
-	// result: (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast256MaskedUint64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast256MaskedUint64x2 x mask)
-	// result: (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast256MaskedUint8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast256MaskedUint8x16 x mask)
-	// result: (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast512MaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast512MaskedFloat32x4 x mask)
-	// result: (VBROADCASTSSMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VBROADCASTSSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast512MaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast512MaskedFloat64x2 x mask)
-	// result: (VBROADCASTSDMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VBROADCASTSDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast512MaskedInt16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast512MaskedInt16x8 x mask)
-	// result: (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast512MaskedInt32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast512MaskedInt32x4 x mask)
-	// result: (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast512MaskedInt64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast512MaskedInt64x2 x mask)
-	// result: (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast512MaskedInt8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast512MaskedInt8x16 x mask)
-	// result: (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast512MaskedUint16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast512MaskedUint16x8 x mask)
-	// result: (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast512MaskedUint32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast512MaskedUint32x4 x mask)
-	// result: (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast512MaskedUint64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast512MaskedUint64x2 x mask)
-	// result: (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpBroadcast512MaskedUint8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Broadcast512MaskedUint8x16 x mask)
-	// result: (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPBROADCASTBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpBswap16(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (Bswap16 x)
@@ -32276,114 +28028,6 @@ func rewriteValueAMD64_OpCeilScaledFloat64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpCeilScaledMaskedFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (CeilScaledMaskedFloat32x16 [a] x mask)
-	// result: (VRNDSCALEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPSMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpCeilScaledMaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (CeilScaledMaskedFloat32x4 [a] x mask)
-	// result: (VRNDSCALEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPSMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpCeilScaledMaskedFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (CeilScaledMaskedFloat32x8 [a] x mask)
-	// result: (VRNDSCALEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPSMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpCeilScaledMaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (CeilScaledMaskedFloat64x2 [a] x mask)
-	// result: (VRNDSCALEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPDMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpCeilScaledMaskedFloat64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (CeilScaledMaskedFloat64x4 [a] x mask)
-	// result: (VRNDSCALEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPDMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpCeilScaledMaskedFloat64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (CeilScaledMaskedFloat64x8 [a] x mask)
-	// result: (VRNDSCALEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPDMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpCeilScaledResidueFloat32x16(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (CeilScaledResidueFloat32x16 [a] x)
@@ -32462,114 +28106,6 @@ func rewriteValueAMD64_OpCeilScaledResidueFloat64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (CeilScaledResidueMaskedFloat32x16 [a] x mask)
-	// result: (VREDUCEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPSMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (CeilScaledResidueMaskedFloat32x4 [a] x mask)
-	// result: (VREDUCEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPSMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (CeilScaledResidueMaskedFloat32x8 [a] x mask)
-	// result: (VREDUCEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPSMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (CeilScaledResidueMaskedFloat64x2 [a] x mask)
-	// result: (VREDUCEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPDMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (CeilScaledResidueMaskedFloat64x4 [a] x mask)
-	// result: (VREDUCEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPDMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (CeilScaledResidueMaskedFloat64x8 [a] x mask)
-	// result: (VREDUCEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPDMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpCompressFloat32x16(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -33961,102 +29497,6 @@ func rewriteValueAMD64_OpConstNil(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpConvertToInt32MaskedFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ConvertToInt32MaskedFloat32x16 x mask)
-	// result: (VCVTTPS2DQMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VCVTTPS2DQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpConvertToInt32MaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ConvertToInt32MaskedFloat32x4 x mask)
-	// result: (VCVTTPS2DQMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VCVTTPS2DQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpConvertToInt32MaskedFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ConvertToInt32MaskedFloat32x8 x mask)
-	// result: (VCVTTPS2DQMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VCVTTPS2DQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpConvertToUint32MaskedFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ConvertToUint32MaskedFloat32x16 x mask)
-	// result: (VCVTPS2UDQMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VCVTPS2UDQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpConvertToUint32MaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ConvertToUint32MaskedFloat32x4 x mask)
-	// result: (VCVTPS2UDQMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VCVTPS2UDQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpConvertToUint32MaskedFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ConvertToUint32MaskedFloat32x8 x mask)
-	// result: (VCVTPS2UDQMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VCVTPS2UDQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpCtz16(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
@@ -34813,222 +30253,6 @@ func rewriteValueAMD64_OpDiv8u(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpDivMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (DivMaskedFloat32x16 x y mask)
-	// result: (VDIVPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VDIVPSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpDivMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (DivMaskedFloat32x4 x y mask)
-	// result: (VDIVPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VDIVPSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpDivMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (DivMaskedFloat32x8 x y mask)
-	// result: (VDIVPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VDIVPSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpDivMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (DivMaskedFloat64x2 x y mask)
-	// result: (VDIVPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VDIVPDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpDivMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (DivMaskedFloat64x4 x y mask)
-	// result: (VDIVPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VDIVPDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpDivMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (DivMaskedFloat64x8 x y mask)
-	// result: (VDIVPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VDIVPDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpDotProdPairsMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (DotProdPairsMaskedInt16x16 x y mask)
-	// result: (VPMADDWDMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMADDWDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpDotProdPairsMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (DotProdPairsMaskedInt16x32 x y mask)
-	// result: (VPMADDWDMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMADDWDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpDotProdPairsMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (DotProdPairsMaskedInt16x8 x y mask)
-	// result: (VPMADDWDMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMADDWDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (DotProdPairsSaturatedMaskedUint8x16 x y mask)
-	// result: (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMADDUBSWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (DotProdPairsSaturatedMaskedUint8x32 x y mask)
-	// result: (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMADDUBSWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (DotProdPairsSaturatedMaskedUint8x64 x y mask)
-	// result: (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMADDUBSWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpEq16(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -35317,666 +30541,6 @@ func rewriteValueAMD64_OpEqualInt8x64(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpEqualMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedFloat32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VCMPPSMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedFloat32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VCMPPSMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedFloat32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VCMPPSMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedFloat64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VCMPPDMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedFloat64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VCMPPDMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedFloat64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VCMPPDMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedInt16x16 x y mask)
-	// result: (VPMOVMToVec16x16 (VPCMPWMasked256 [0] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedInt16x32 x y mask)
-	// result: (VPMOVMToVec16x32 (VPCMPWMasked512 [0] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedInt16x8 x y mask)
-	// result: (VPMOVMToVec16x8 (VPCMPWMasked128 [0] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedInt32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VPCMPDMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedInt32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VPCMPDMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedInt32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VPCMPDMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedInt64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VPCMPQMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedInt64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VPCMPQMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedInt64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VPCMPQMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedInt8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedInt8x16 x y mask)
-	// result: (VPMOVMToVec8x16 (VPCMPBMasked128 [0] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedInt8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedInt8x32 x y mask)
-	// result: (VPMOVMToVec8x32 (VPCMPBMasked256 [0] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedInt8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedInt8x64 x y mask)
-	// result: (VPMOVMToVec8x64 (VPCMPBMasked512 [0] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x64)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedUint16x16 x y mask)
-	// result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [0] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedUint16x32 x y mask)
-	// result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [0] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedUint16x8 x y mask)
-	// result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [0] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedUint32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedUint32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedUint32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedUint64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedUint64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedUint64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedUint8x16 x y mask)
-	// result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [0] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedUint8x32 x y mask)
-	// result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [0] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpEqualMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (EqualMaskedUint8x64 x y mask)
-	// result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [0] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x64)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpEqualUint16x32(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -36678,114 +31242,6 @@ func rewriteValueAMD64_OpFloorScaledFloat64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpFloorScaledMaskedFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (FloorScaledMaskedFloat32x16 [a] x mask)
-	// result: (VRNDSCALEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPSMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 1)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpFloorScaledMaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (FloorScaledMaskedFloat32x4 [a] x mask)
-	// result: (VRNDSCALEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPSMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 1)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpFloorScaledMaskedFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (FloorScaledMaskedFloat32x8 [a] x mask)
-	// result: (VRNDSCALEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPSMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 1)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpFloorScaledMaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (FloorScaledMaskedFloat64x2 [a] x mask)
-	// result: (VRNDSCALEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPDMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 1)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpFloorScaledMaskedFloat64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (FloorScaledMaskedFloat64x4 [a] x mask)
-	// result: (VRNDSCALEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPDMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 1)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpFloorScaledMaskedFloat64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (FloorScaledMaskedFloat64x8 [a] x mask)
-	// result: (VRNDSCALEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPDMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 1)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpFloorScaledResidueFloat32x16(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (FloorScaledResidueFloat32x16 [a] x)
@@ -36864,288 +31320,6 @@ func rewriteValueAMD64_OpFloorScaledResidueFloat64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (FloorScaledResidueMaskedFloat32x16 [a] x mask)
-	// result: (VREDUCEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPSMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 1)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (FloorScaledResidueMaskedFloat32x4 [a] x mask)
-	// result: (VREDUCEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPSMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 1)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (FloorScaledResidueMaskedFloat32x8 [a] x mask)
-	// result: (VREDUCEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPSMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 1)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (FloorScaledResidueMaskedFloat64x2 [a] x mask)
-	// result: (VREDUCEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPDMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 1)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (FloorScaledResidueMaskedFloat64x4 [a] x mask)
-	// result: (VREDUCEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPDMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 1)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (FloorScaledResidueMaskedFloat64x8 [a] x mask)
-	// result: (VREDUCEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPDMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 1)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGaloisFieldAffineTransformInverseMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (GaloisFieldAffineTransformInverseMaskedUint8x16 [a] x y mask)
-	// result: (VGF2P8AFFINEINVQBMasked128 [a] x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VGF2P8AFFINEINVQBMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGaloisFieldAffineTransformInverseMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (GaloisFieldAffineTransformInverseMaskedUint8x32 [a] x y mask)
-	// result: (VGF2P8AFFINEINVQBMasked256 [a] x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VGF2P8AFFINEINVQBMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGaloisFieldAffineTransformInverseMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (GaloisFieldAffineTransformInverseMaskedUint8x64 [a] x y mask)
-	// result: (VGF2P8AFFINEINVQBMasked512 [a] x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VGF2P8AFFINEINVQBMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (GaloisFieldAffineTransformMaskedUint8x16 [a] x y mask)
-	// result: (VGF2P8AFFINEQBMasked128 [a] x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VGF2P8AFFINEQBMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (GaloisFieldAffineTransformMaskedUint8x32 [a] x y mask)
-	// result: (VGF2P8AFFINEQBMasked256 [a] x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VGF2P8AFFINEQBMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (GaloisFieldAffineTransformMaskedUint8x64 [a] x y mask)
-	// result: (VGF2P8AFFINEQBMasked512 [a] x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VGF2P8AFFINEQBMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (GaloisFieldMulMaskedUint8x16 x y mask)
-	// result: (VGF2P8MULBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VGF2P8MULBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (GaloisFieldMulMaskedUint8x32 x y mask)
-	// result: (VGF2P8MULBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VGF2P8MULBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (GaloisFieldMulMaskedUint8x64 x y mask)
-	// result: (VGF2P8MULBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VGF2P8MULBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpGetG(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (GetG mem)
@@ -37806,666 +31980,6 @@ func rewriteValueAMD64_OpGreaterEqualInt8x64(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpGreaterEqualMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedFloat32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VCMPPSMasked512 [13] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedFloat32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VCMPPSMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedFloat32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VCMPPSMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedFloat64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VCMPPDMasked128 [13] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedFloat64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VCMPPDMasked256 [13] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedFloat64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VCMPPDMasked512 [13] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedInt16x16 x y mask)
-	// result: (VPMOVMToVec16x16 (VPCMPWMasked256 [13] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedInt16x32 x y mask)
-	// result: (VPMOVMToVec16x32 (VPCMPWMasked512 [13] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedInt16x8 x y mask)
-	// result: (VPMOVMToVec16x8 (VPCMPWMasked128 [13] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedInt32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VPCMPDMasked512 [13] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedInt32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VPCMPDMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedInt32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VPCMPDMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedInt64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VPCMPQMasked128 [13] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedInt64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VPCMPQMasked256 [13] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedInt64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VPCMPQMasked512 [13] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedInt8x16 x y mask)
-	// result: (VPMOVMToVec8x16 (VPCMPBMasked128 [13] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedInt8x32 x y mask)
-	// result: (VPMOVMToVec8x32 (VPCMPBMasked256 [13] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedInt8x64 x y mask)
-	// result: (VPMOVMToVec8x64 (VPCMPBMasked512 [13] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x64)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedUint16x16 x y mask)
-	// result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [13] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedUint16x32 x y mask)
-	// result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [13] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedUint16x8 x y mask)
-	// result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [13] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedUint32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [13] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedUint32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedUint32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedUint64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [13] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedUint64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [13] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedUint64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [13] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedUint8x16 x y mask)
-	// result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [13] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedUint8x32 x y mask)
-	// result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [13] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualMaskedUint8x64 x y mask)
-	// result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [13] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x64)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpGreaterEqualUint16x32(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -38698,666 +32212,6 @@ func rewriteValueAMD64_OpGreaterInt8x64(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpGreaterMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedFloat32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VCMPPSMasked512 [14] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedFloat32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VCMPPSMasked128 [14] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedFloat32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VCMPPSMasked256 [14] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedFloat64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VCMPPDMasked128 [14] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedFloat64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VCMPPDMasked256 [14] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedFloat64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VCMPPDMasked512 [14] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedInt16x16 x y mask)
-	// result: (VPMOVMToVec16x16 (VPCMPWMasked256 [14] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedInt16x32 x y mask)
-	// result: (VPMOVMToVec16x32 (VPCMPWMasked512 [14] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedInt16x8 x y mask)
-	// result: (VPMOVMToVec16x8 (VPCMPWMasked128 [14] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedInt32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VPCMPDMasked512 [14] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedInt32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VPCMPDMasked128 [14] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedInt32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VPCMPDMasked256 [14] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedInt64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VPCMPQMasked128 [14] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedInt64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VPCMPQMasked256 [14] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedInt64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VPCMPQMasked512 [14] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedInt8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedInt8x16 x y mask)
-	// result: (VPMOVMToVec8x16 (VPCMPBMasked128 [14] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedInt8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedInt8x32 x y mask)
-	// result: (VPMOVMToVec8x32 (VPCMPBMasked256 [14] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedInt8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedInt8x64 x y mask)
-	// result: (VPMOVMToVec8x64 (VPCMPBMasked512 [14] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x64)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedUint16x16 x y mask)
-	// result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [14] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedUint16x32 x y mask)
-	// result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [14] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedUint16x8 x y mask)
-	// result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [14] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedUint32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [14] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedUint32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [14] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedUint32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [14] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedUint64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [14] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedUint64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [14] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedUint64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [14] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedUint8x16 x y mask)
-	// result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [14] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedUint8x32 x y mask)
-	// result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [14] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterMaskedUint8x64 x y mask)
-	// result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [14] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x64)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpGreaterUint16x32(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -39555,138 +32409,6 @@ func rewriteValueAMD64_OpIsNanFloat64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpIsNanMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (IsNanMaskedFloat32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VCMPPSMasked512 [3] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(3)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpIsNanMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (IsNanMaskedFloat32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VCMPPSMasked128 [3] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(3)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpIsNanMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (IsNanMaskedFloat32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VCMPPSMasked256 [3] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(3)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpIsNanMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (IsNanMaskedFloat64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VCMPPDMasked128 [3] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(3)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpIsNanMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (IsNanMaskedFloat64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VCMPPDMasked256 [3] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(3)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpIsNanMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (IsNanMaskedFloat64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VCMPPDMasked512 [3] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(3)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpIsNonNil(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
@@ -40201,666 +32923,6 @@ func rewriteValueAMD64_OpLessEqualInt8x64(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpLessEqualMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedFloat32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VCMPPSMasked512 [2] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedFloat32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VCMPPSMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedFloat32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VCMPPSMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedFloat64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VCMPPDMasked128 [2] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedFloat64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VCMPPDMasked256 [2] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedFloat64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VCMPPDMasked512 [2] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedInt16x16 x y mask)
-	// result: (VPMOVMToVec16x16 (VPCMPWMasked256 [2] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedInt16x32 x y mask)
-	// result: (VPMOVMToVec16x32 (VPCMPWMasked512 [2] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedInt16x8 x y mask)
-	// result: (VPMOVMToVec16x8 (VPCMPWMasked128 [2] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedInt32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VPCMPDMasked512 [2] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedInt32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VPCMPDMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedInt32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VPCMPDMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedInt64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VPCMPQMasked128 [2] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedInt64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VPCMPQMasked256 [2] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedInt64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VPCMPQMasked512 [2] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedInt8x16 x y mask)
-	// result: (VPMOVMToVec8x16 (VPCMPBMasked128 [2] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedInt8x32 x y mask)
-	// result: (VPMOVMToVec8x32 (VPCMPBMasked256 [2] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedInt8x64 x y mask)
-	// result: (VPMOVMToVec8x64 (VPCMPBMasked512 [2] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x64)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedUint16x16 x y mask)
-	// result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [2] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedUint16x32 x y mask)
-	// result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [2] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedUint16x8 x y mask)
-	// result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [2] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedUint32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [2] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedUint32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedUint32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedUint64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [2] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedUint64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [2] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedUint64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [2] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedUint8x16 x y mask)
-	// result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [2] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedUint8x32 x y mask)
-	// result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [2] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualMaskedUint8x64 x y mask)
-	// result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [2] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x64)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpLessEqualUint16x32(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -41097,666 +33159,6 @@ func rewriteValueAMD64_OpLessInt8x64(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpLessMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedFloat32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VCMPPSMasked512 [1] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedFloat32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VCMPPSMasked128 [1] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedFloat32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VCMPPSMasked256 [1] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedFloat64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VCMPPDMasked128 [1] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedFloat64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VCMPPDMasked256 [1] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedFloat64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VCMPPDMasked512 [1] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedInt16x16 x y mask)
-	// result: (VPMOVMToVec16x16 (VPCMPWMasked256 [1] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedInt16x32 x y mask)
-	// result: (VPMOVMToVec16x32 (VPCMPWMasked512 [1] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedInt16x8 x y mask)
-	// result: (VPMOVMToVec16x8 (VPCMPWMasked128 [1] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedInt32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VPCMPDMasked512 [1] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedInt32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VPCMPDMasked128 [1] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedInt32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VPCMPDMasked256 [1] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedInt64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VPCMPQMasked128 [1] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedInt64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VPCMPQMasked256 [1] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedInt64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VPCMPQMasked512 [1] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedInt8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedInt8x16 x y mask)
-	// result: (VPMOVMToVec8x16 (VPCMPBMasked128 [1] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedInt8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedInt8x32 x y mask)
-	// result: (VPMOVMToVec8x32 (VPCMPBMasked256 [1] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedInt8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedInt8x64 x y mask)
-	// result: (VPMOVMToVec8x64 (VPCMPBMasked512 [1] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x64)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedUint16x16 x y mask)
-	// result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [1] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedUint16x32 x y mask)
-	// result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [1] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedUint16x8 x y mask)
-	// result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [1] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedUint32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [1] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedUint32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [1] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedUint32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [1] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedUint64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [1] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedUint64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [1] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedUint64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [1] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedUint8x16 x y mask)
-	// result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [1] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedUint8x32 x y mask)
-	// result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [1] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessMaskedUint8x64 x y mask)
-	// result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [1] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x64)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpLessUint16x32(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -43056,546 +34458,6 @@ func rewriteValueAMD64_OpMax64F(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpMaxMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedFloat32x16 x y mask)
-	// result: (VMAXPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMAXPSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedFloat32x4 x y mask)
-	// result: (VMAXPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMAXPSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedFloat32x8 x y mask)
-	// result: (VMAXPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMAXPSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedFloat64x2 x y mask)
-	// result: (VMAXPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMAXPDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedFloat64x4 x y mask)
-	// result: (VMAXPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMAXPDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedFloat64x8 x y mask)
-	// result: (VMAXPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMAXPDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedInt16x16 x y mask)
-	// result: (VPMAXSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXSWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedInt16x32 x y mask)
-	// result: (VPMAXSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXSWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedInt16x8 x y mask)
-	// result: (VPMAXSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXSWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedInt32x16 x y mask)
-	// result: (VPMAXSDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXSDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedInt32x4 x y mask)
-	// result: (VPMAXSDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXSDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedInt32x8 x y mask)
-	// result: (VPMAXSDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXSDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedInt64x2 x y mask)
-	// result: (VPMAXSQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXSQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedInt64x4 x y mask)
-	// result: (VPMAXSQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXSQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedInt64x8 x y mask)
-	// result: (VPMAXSQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXSQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedInt8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedInt8x16 x y mask)
-	// result: (VPMAXSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXSBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedInt8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedInt8x32 x y mask)
-	// result: (VPMAXSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXSBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedInt8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedInt8x64 x y mask)
-	// result: (VPMAXSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXSBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedUint16x16 x y mask)
-	// result: (VPMAXUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXUWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedUint16x32 x y mask)
-	// result: (VPMAXUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXUWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedUint16x8 x y mask)
-	// result: (VPMAXUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXUWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedUint32x16 x y mask)
-	// result: (VPMAXUDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXUDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedUint32x4 x y mask)
-	// result: (VPMAXUDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXUDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedUint32x8 x y mask)
-	// result: (VPMAXUDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXUDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedUint64x2 x y mask)
-	// result: (VPMAXUQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXUQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedUint64x4 x y mask)
-	// result: (VPMAXUQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXUQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedUint64x8 x y mask)
-	// result: (VPMAXUQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXUQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedUint8x16 x y mask)
-	// result: (VPMAXUBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXUBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedUint8x32 x y mask)
-	// result: (VPMAXUBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXUBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMaxMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MaxMaskedUint8x64 x y mask)
-	// result: (VPMAXUBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMAXUBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpMin32F(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -43634,546 +34496,6 @@ func rewriteValueAMD64_OpMin64F(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpMinMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedFloat32x16 x y mask)
-	// result: (VMINPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMINPSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedFloat32x4 x y mask)
-	// result: (VMINPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMINPSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedFloat32x8 x y mask)
-	// result: (VMINPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMINPSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedFloat64x2 x y mask)
-	// result: (VMINPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMINPDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedFloat64x4 x y mask)
-	// result: (VMINPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMINPDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedFloat64x8 x y mask)
-	// result: (VMINPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMINPDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedInt16x16 x y mask)
-	// result: (VPMINSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINSWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedInt16x32 x y mask)
-	// result: (VPMINSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINSWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedInt16x8 x y mask)
-	// result: (VPMINSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINSWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedInt32x16 x y mask)
-	// result: (VPMINSDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINSDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedInt32x4 x y mask)
-	// result: (VPMINSDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINSDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedInt32x8 x y mask)
-	// result: (VPMINSDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINSDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedInt64x2 x y mask)
-	// result: (VPMINSQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINSQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedInt64x4 x y mask)
-	// result: (VPMINSQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINSQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedInt64x8 x y mask)
-	// result: (VPMINSQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINSQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedInt8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedInt8x16 x y mask)
-	// result: (VPMINSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINSBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedInt8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedInt8x32 x y mask)
-	// result: (VPMINSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINSBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedInt8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedInt8x64 x y mask)
-	// result: (VPMINSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINSBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedUint16x16 x y mask)
-	// result: (VPMINUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINUWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedUint16x32 x y mask)
-	// result: (VPMINUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINUWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedUint16x8 x y mask)
-	// result: (VPMINUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINUWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedUint32x16 x y mask)
-	// result: (VPMINUDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINUDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedUint32x4 x y mask)
-	// result: (VPMINUDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINUDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedUint32x8 x y mask)
-	// result: (VPMINUDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINUDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedUint64x2 x y mask)
-	// result: (VPMINUQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINUQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedUint64x4 x y mask)
-	// result: (VPMINUQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINUQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedUint64x8 x y mask)
-	// result: (VPMINUQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINUQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedUint8x16 x y mask)
-	// result: (VPMINUBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINUBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedUint8x32 x y mask)
-	// result: (VPMINUBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINUBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMinMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MinMaskedUint8x64 x y mask)
-	// result: (VPMINUBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMINUBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpMod16(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -44683,906 +35005,6 @@ func rewriteValueAMD64_OpMove(v *Value) bool {
 	}
 	return false
 }
-func rewriteValueAMD64_OpMulAddMaskedFloat32x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulAddMaskedFloat32x16 x y z mask)
-	// result: (VFMADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMADD213PSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulAddMaskedFloat32x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulAddMaskedFloat32x4 x y z mask)
-	// result: (VFMADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMADD213PSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulAddMaskedFloat32x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulAddMaskedFloat32x8 x y z mask)
-	// result: (VFMADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMADD213PSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulAddMaskedFloat64x2(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulAddMaskedFloat64x2 x y z mask)
-	// result: (VFMADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMADD213PDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulAddMaskedFloat64x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulAddMaskedFloat64x4 x y z mask)
-	// result: (VFMADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMADD213PDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulAddMaskedFloat64x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulAddMaskedFloat64x8 x y z mask)
-	// result: (VFMADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMADD213PDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulAddSubMaskedFloat32x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulAddSubMaskedFloat32x16 x y z mask)
-	// result: (VFMADDSUB213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMADDSUB213PSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulAddSubMaskedFloat32x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulAddSubMaskedFloat32x4 x y z mask)
-	// result: (VFMADDSUB213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMADDSUB213PSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulAddSubMaskedFloat32x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulAddSubMaskedFloat32x8 x y z mask)
-	// result: (VFMADDSUB213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMADDSUB213PSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulAddSubMaskedFloat64x2(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulAddSubMaskedFloat64x2 x y z mask)
-	// result: (VFMADDSUB213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMADDSUB213PDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulAddSubMaskedFloat64x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulAddSubMaskedFloat64x4 x y z mask)
-	// result: (VFMADDSUB213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMADDSUB213PDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulAddSubMaskedFloat64x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulAddSubMaskedFloat64x8 x y z mask)
-	// result: (VFMADDSUB213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMADDSUB213PDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulHighMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulHighMaskedInt16x16 x y mask)
-	// result: (VPMULHWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULHWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulHighMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulHighMaskedInt16x32 x y mask)
-	// result: (VPMULHWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULHWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulHighMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulHighMaskedInt16x8 x y mask)
-	// result: (VPMULHWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULHWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulHighMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulHighMaskedUint16x16 x y mask)
-	// result: (VPMULHUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULHUWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulHighMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulHighMaskedUint16x32 x y mask)
-	// result: (VPMULHUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULHUWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulHighMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulHighMaskedUint16x8 x y mask)
-	// result: (VPMULHUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULHUWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedFloat32x16 x y mask)
-	// result: (VMULPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMULPSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedFloat32x4 x y mask)
-	// result: (VMULPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMULPSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedFloat32x8 x y mask)
-	// result: (VMULPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMULPSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedFloat64x2 x y mask)
-	// result: (VMULPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMULPDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedFloat64x4 x y mask)
-	// result: (VMULPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMULPDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedFloat64x8 x y mask)
-	// result: (VMULPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VMULPDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedInt16x16 x y mask)
-	// result: (VPMULLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedInt16x32 x y mask)
-	// result: (VPMULLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedInt16x8 x y mask)
-	// result: (VPMULLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedInt32x16 x y mask)
-	// result: (VPMULLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedInt32x4 x y mask)
-	// result: (VPMULLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedInt32x8 x y mask)
-	// result: (VPMULLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedInt64x2 x y mask)
-	// result: (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedInt64x4 x y mask)
-	// result: (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedInt64x8 x y mask)
-	// result: (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedUint16x16 x y mask)
-	// result: (VPMULLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedUint16x32 x y mask)
-	// result: (VPMULLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedUint16x8 x y mask)
-	// result: (VPMULLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedUint32x16 x y mask)
-	// result: (VPMULLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedUint32x4 x y mask)
-	// result: (VPMULLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedUint32x8 x y mask)
-	// result: (VPMULLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedUint64x2 x y mask)
-	// result: (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedUint64x4 x y mask)
-	// result: (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulMaskedUint64x8 x y mask)
-	// result: (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMULLQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulSubAddMaskedFloat32x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulSubAddMaskedFloat32x16 x y z mask)
-	// result: (VFMSUBADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMSUBADD213PSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulSubAddMaskedFloat32x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulSubAddMaskedFloat32x4 x y z mask)
-	// result: (VFMSUBADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMSUBADD213PSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulSubAddMaskedFloat32x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulSubAddMaskedFloat32x8 x y z mask)
-	// result: (VFMSUBADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMSUBADD213PSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulSubAddMaskedFloat64x2(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulSubAddMaskedFloat64x2 x y z mask)
-	// result: (VFMSUBADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMSUBADD213PDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulSubAddMaskedFloat64x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulSubAddMaskedFloat64x4 x y z mask)
-	// result: (VFMSUBADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMSUBADD213PDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpMulSubAddMaskedFloat64x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MulSubAddMaskedFloat64x8 x y z mask)
-	// result: (VFMSUBADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VFMSUBADD213PDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpNeg32F(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
@@ -45917,666 +35339,6 @@ func rewriteValueAMD64_OpNotEqualInt8x64(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpNotEqualMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedFloat32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VCMPPSMasked512 [4] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedFloat32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VCMPPSMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedFloat32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VCMPPSMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedFloat64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VCMPPDMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedFloat64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VCMPPDMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedFloat64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VCMPPDMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedInt16x16 x y mask)
-	// result: (VPMOVMToVec16x16 (VPCMPWMasked256 [4] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedInt16x32 x y mask)
-	// result: (VPMOVMToVec16x32 (VPCMPWMasked512 [4] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedInt16x8 x y mask)
-	// result: (VPMOVMToVec16x8 (VPCMPWMasked128 [4] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedInt32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VPCMPDMasked512 [4] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedInt32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VPCMPDMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedInt32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VPCMPDMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedInt64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VPCMPQMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedInt64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VPCMPQMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedInt64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VPCMPQMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedInt8x16 x y mask)
-	// result: (VPMOVMToVec8x16 (VPCMPBMasked128 [4] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedInt8x32 x y mask)
-	// result: (VPMOVMToVec8x32 (VPCMPBMasked256 [4] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedInt8x64 x y mask)
-	// result: (VPMOVMToVec8x64 (VPCMPBMasked512 [4] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x64)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedUint16x16 x y mask)
-	// result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [4] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedUint16x32 x y mask)
-	// result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [4] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedUint16x8 x y mask)
-	// result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [4] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedUint32x16 x y mask)
-	// result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [4] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedUint32x4 x y mask)
-	// result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedUint32x8 x y mask)
-	// result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedUint64x2 x y mask)
-	// result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedUint64x4 x y mask)
-	// result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedUint64x8 x y mask)
-	// result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec64x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedUint8x16 x y mask)
-	// result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [4] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedUint8x32 x y mask)
-	// result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [4] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualMaskedUint8x64 x y mask)
-	// result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [4] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPMOVMToVec8x64)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v1.AddArg(mask)
-		v0.AddArg3(x, y, v1)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpNotEqualUint16x32(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -46679,1638 +35441,6 @@ func rewriteValueAMD64_OpOffPtr(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpOnesCountMaskedInt16x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedInt16x16 x mask)
-	// result: (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt16x32(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedInt16x32 x mask)
-	// result: (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedInt16x8 x mask)
-	// result: (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedInt32x16 x mask)
-	// result: (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedInt32x4 x mask)
-	// result: (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedInt32x8 x mask)
-	// result: (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedInt64x2 x mask)
-	// result: (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedInt64x4 x mask)
-	// result: (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedInt64x8 x mask)
-	// result: (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedInt8x16 x mask)
-	// result: (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt8x32(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedInt8x32 x mask)
-	// result: (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt8x64(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedInt8x64 x mask)
-	// result: (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint16x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedUint16x16 x mask)
-	// result: (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint16x32(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedUint16x32 x mask)
-	// result: (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedUint16x8 x mask)
-	// result: (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedUint32x16 x mask)
-	// result: (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedUint32x4 x mask)
-	// result: (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedUint32x8 x mask)
-	// result: (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedUint64x2 x mask)
-	// result: (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedUint64x4 x mask)
-	// result: (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedUint64x8 x mask)
-	// result: (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedUint8x16 x mask)
-	// result: (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint8x32(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedUint8x32 x mask)
-	// result: (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint8x64(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OnesCountMaskedUint8x64 x mask)
-	// result: (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPOPCNTBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOrMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OrMaskedInt32x16 x y mask)
-	// result: (VPORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPORDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOrMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OrMaskedInt32x4 x y mask)
-	// result: (VPORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPORDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOrMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OrMaskedInt32x8 x y mask)
-	// result: (VPORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPORDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOrMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OrMaskedInt64x2 x y mask)
-	// result: (VPORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPORQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOrMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OrMaskedInt64x4 x y mask)
-	// result: (VPORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPORQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOrMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OrMaskedInt64x8 x y mask)
-	// result: (VPORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPORQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOrMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OrMaskedUint32x16 x y mask)
-	// result: (VPORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPORDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOrMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OrMaskedUint32x4 x y mask)
-	// result: (VPORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPORDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOrMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OrMaskedUint32x8 x y mask)
-	// result: (VPORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPORDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOrMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OrMaskedUint64x2 x y mask)
-	// result: (VPORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPORQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOrMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OrMaskedUint64x4 x y mask)
-	// result: (VPORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPORQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpOrMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (OrMaskedUint64x8 x y mask)
-	// result: (VPORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPORQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedFloat32x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedFloat32x16 x y z mask)
-	// result: (VPERMI2PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2PSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedFloat32x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedFloat32x4 x y z mask)
-	// result: (VPERMI2PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2PSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedFloat32x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedFloat32x8 x y z mask)
-	// result: (VPERMI2PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2PSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedFloat64x2(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedFloat64x2 x y z mask)
-	// result: (VPERMI2PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2PDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedFloat64x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedFloat64x4 x y z mask)
-	// result: (VPERMI2PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2PDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedFloat64x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedFloat64x8 x y z mask)
-	// result: (VPERMI2PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2PDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedInt16x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedInt16x16 x y z mask)
-	// result: (VPERMI2WMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2WMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedInt16x32(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedInt16x32 x y z mask)
-	// result: (VPERMI2WMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2WMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedInt16x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedInt16x8 x y z mask)
-	// result: (VPERMI2WMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2WMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedInt32x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedInt32x16 x y z mask)
-	// result: (VPERMI2DMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2DMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedInt32x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedInt32x4 x y z mask)
-	// result: (VPERMI2DMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2DMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedInt32x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedInt32x8 x y z mask)
-	// result: (VPERMI2DMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2DMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedInt64x2(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedInt64x2 x y z mask)
-	// result: (VPERMI2QMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2QMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedInt64x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedInt64x4 x y z mask)
-	// result: (VPERMI2QMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2QMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedInt64x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedInt64x8 x y z mask)
-	// result: (VPERMI2QMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2QMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedInt8x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedInt8x16 x y z mask)
-	// result: (VPERMI2BMasked128 x y z (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2BMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedInt8x32(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedInt8x32 x y z mask)
-	// result: (VPERMI2BMasked256 x y z (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2BMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedInt8x64(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedInt8x64 x y z mask)
-	// result: (VPERMI2BMasked512 x y z (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2BMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedUint16x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedUint16x16 x y z mask)
-	// result: (VPERMI2WMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2WMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedUint16x32(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedUint16x32 x y z mask)
-	// result: (VPERMI2WMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2WMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedUint16x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedUint16x8 x y z mask)
-	// result: (VPERMI2WMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2WMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedUint32x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedUint32x16 x y z mask)
-	// result: (VPERMI2DMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2DMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedUint32x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedUint32x4 x y z mask)
-	// result: (VPERMI2DMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2DMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedUint32x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedUint32x8 x y z mask)
-	// result: (VPERMI2DMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2DMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedUint64x2(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedUint64x2 x y z mask)
-	// result: (VPERMI2QMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2QMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedUint64x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedUint64x4 x y z mask)
-	// result: (VPERMI2QMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2QMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedUint64x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedUint64x8 x y z mask)
-	// result: (VPERMI2QMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2QMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedUint8x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedUint8x16 x y z mask)
-	// result: (VPERMI2BMasked128 x y z (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2BMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedUint8x32(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedUint8x32 x y z mask)
-	// result: (VPERMI2BMasked256 x y z (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2BMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermute2MaskedUint8x64(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (Permute2MaskedUint8x64 x y z mask)
-	// result: (VPERMI2BMasked512 x y z (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPERMI2BMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedFloat32x16 x y mask)
-	// result: (VPERMPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMPSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedFloat32x8 x y mask)
-	// result: (VPERMPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMPSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedFloat64x4 x y mask)
-	// result: (VPERMPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMPDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedFloat64x8 x y mask)
-	// result: (VPERMPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMPDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedInt16x16 x y mask)
-	// result: (VPERMWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedInt16x32 x y mask)
-	// result: (VPERMWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedInt16x8 x y mask)
-	// result: (VPERMWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedInt32x16 x y mask)
-	// result: (VPERMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedInt32x8 x y mask)
-	// result: (VPERMDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedInt64x4 x y mask)
-	// result: (VPERMQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedInt64x8 x y mask)
-	// result: (VPERMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedInt8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedInt8x16 x y mask)
-	// result: (VPERMBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedInt8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedInt8x32 x y mask)
-	// result: (VPERMBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedInt8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedInt8x64 x y mask)
-	// result: (VPERMBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedUint16x16 x y mask)
-	// result: (VPERMWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedUint16x32 x y mask)
-	// result: (VPERMWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedUint16x8 x y mask)
-	// result: (VPERMWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedUint32x16 x y mask)
-	// result: (VPERMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedUint32x8 x y mask)
-	// result: (VPERMDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedUint64x4 x y mask)
-	// result: (VPERMQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedUint64x8 x y mask)
-	// result: (VPERMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedUint8x16 x y mask)
-	// result: (VPERMBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedUint8x32 x y mask)
-	// result: (VPERMBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpPermuteMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (PermuteMaskedUint8x64 x y mask)
-	// result: (VPERMBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPERMBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpPopCount16(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
@@ -48341,1062 +35471,6 @@ func rewriteValueAMD64_OpPopCount8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpReciprocalMaskedFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ReciprocalMaskedFloat32x16 x mask)
-	// result: (VRCP14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRCP14PSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpReciprocalMaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ReciprocalMaskedFloat32x4 x mask)
-	// result: (VRCP14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRCP14PSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpReciprocalMaskedFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ReciprocalMaskedFloat32x8 x mask)
-	// result: (VRCP14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRCP14PSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpReciprocalMaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ReciprocalMaskedFloat64x2 x mask)
-	// result: (VRCP14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRCP14PDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpReciprocalMaskedFloat64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ReciprocalMaskedFloat64x4 x mask)
-	// result: (VRCP14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRCP14PDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpReciprocalMaskedFloat64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ReciprocalMaskedFloat64x8 x mask)
-	// result: (VRCP14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRCP14PDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ReciprocalSqrtMaskedFloat32x16 x mask)
-	// result: (VRSQRT14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRSQRT14PSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ReciprocalSqrtMaskedFloat32x4 x mask)
-	// result: (VRSQRT14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRSQRT14PSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ReciprocalSqrtMaskedFloat32x8 x mask)
-	// result: (VRSQRT14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRSQRT14PSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ReciprocalSqrtMaskedFloat64x2 x mask)
-	// result: (VRSQRT14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRSQRT14PDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ReciprocalSqrtMaskedFloat64x4 x mask)
-	// result: (VRSQRT14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRSQRT14PDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ReciprocalSqrtMaskedFloat64x8 x mask)
-	// result: (VRSQRT14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRSQRT14PDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllLeftMaskedInt32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllLeftMaskedInt32x16 [a] x mask)
-	// result: (VPROLDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPROLDMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllLeftMaskedInt32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllLeftMaskedInt32x4 [a] x mask)
-	// result: (VPROLDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPROLDMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllLeftMaskedInt32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllLeftMaskedInt32x8 [a] x mask)
-	// result: (VPROLDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPROLDMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllLeftMaskedInt64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllLeftMaskedInt64x2 [a] x mask)
-	// result: (VPROLQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPROLQMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllLeftMaskedInt64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllLeftMaskedInt64x4 [a] x mask)
-	// result: (VPROLQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPROLQMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllLeftMaskedInt64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllLeftMaskedInt64x8 [a] x mask)
-	// result: (VPROLQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPROLQMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllLeftMaskedUint32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllLeftMaskedUint32x16 [a] x mask)
-	// result: (VPROLDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPROLDMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllLeftMaskedUint32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllLeftMaskedUint32x4 [a] x mask)
-	// result: (VPROLDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPROLDMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllLeftMaskedUint32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllLeftMaskedUint32x8 [a] x mask)
-	// result: (VPROLDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPROLDMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllLeftMaskedUint64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllLeftMaskedUint64x2 [a] x mask)
-	// result: (VPROLQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPROLQMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllLeftMaskedUint64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllLeftMaskedUint64x4 [a] x mask)
-	// result: (VPROLQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPROLQMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllLeftMaskedUint64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllLeftMaskedUint64x8 [a] x mask)
-	// result: (VPROLQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPROLQMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllRightMaskedInt32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllRightMaskedInt32x16 [a] x mask)
-	// result: (VPRORDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPRORDMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllRightMaskedInt32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllRightMaskedInt32x4 [a] x mask)
-	// result: (VPRORDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPRORDMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllRightMaskedInt32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllRightMaskedInt32x8 [a] x mask)
-	// result: (VPRORDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPRORDMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllRightMaskedInt64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllRightMaskedInt64x2 [a] x mask)
-	// result: (VPRORQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPRORQMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllRightMaskedInt64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllRightMaskedInt64x4 [a] x mask)
-	// result: (VPRORQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPRORQMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllRightMaskedInt64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllRightMaskedInt64x8 [a] x mask)
-	// result: (VPRORQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPRORQMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllRightMaskedUint32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllRightMaskedUint32x16 [a] x mask)
-	// result: (VPRORDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPRORDMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllRightMaskedUint32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllRightMaskedUint32x4 [a] x mask)
-	// result: (VPRORDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPRORDMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllRightMaskedUint32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllRightMaskedUint32x8 [a] x mask)
-	// result: (VPRORDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPRORDMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllRightMaskedUint64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllRightMaskedUint64x2 [a] x mask)
-	// result: (VPRORQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPRORQMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllRightMaskedUint64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllRightMaskedUint64x4 [a] x mask)
-	// result: (VPRORQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPRORQMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateAllRightMaskedUint64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateAllRightMaskedUint64x8 [a] x mask)
-	// result: (VPRORQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VPRORQMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateLeftMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateLeftMaskedInt32x16 x y mask)
-	// result: (VPROLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPROLVDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateLeftMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateLeftMaskedInt32x4 x y mask)
-	// result: (VPROLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPROLVDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateLeftMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateLeftMaskedInt32x8 x y mask)
-	// result: (VPROLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPROLVDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateLeftMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateLeftMaskedInt64x2 x y mask)
-	// result: (VPROLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPROLVQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateLeftMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateLeftMaskedInt64x4 x y mask)
-	// result: (VPROLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPROLVQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateLeftMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateLeftMaskedInt64x8 x y mask)
-	// result: (VPROLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPROLVQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateLeftMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateLeftMaskedUint32x16 x y mask)
-	// result: (VPROLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPROLVDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateLeftMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateLeftMaskedUint32x4 x y mask)
-	// result: (VPROLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPROLVDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateLeftMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateLeftMaskedUint32x8 x y mask)
-	// result: (VPROLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPROLVDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateLeftMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateLeftMaskedUint64x2 x y mask)
-	// result: (VPROLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPROLVQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateLeftMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateLeftMaskedUint64x4 x y mask)
-	// result: (VPROLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPROLVQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateLeftMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateLeftMaskedUint64x8 x y mask)
-	// result: (VPROLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPROLVQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateRightMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateRightMaskedInt32x16 x y mask)
-	// result: (VPRORVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPRORVDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateRightMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateRightMaskedInt32x4 x y mask)
-	// result: (VPRORVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPRORVDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateRightMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateRightMaskedInt32x8 x y mask)
-	// result: (VPRORVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPRORVDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateRightMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateRightMaskedInt64x2 x y mask)
-	// result: (VPRORVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPRORVQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateRightMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateRightMaskedInt64x4 x y mask)
-	// result: (VPRORVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPRORVQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateRightMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateRightMaskedInt64x8 x y mask)
-	// result: (VPRORVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPRORVQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateRightMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateRightMaskedUint32x16 x y mask)
-	// result: (VPRORVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPRORVDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateRightMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateRightMaskedUint32x4 x y mask)
-	// result: (VPRORVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPRORVDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateRightMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateRightMaskedUint32x8 x y mask)
-	// result: (VPRORVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPRORVDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateRightMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateRightMaskedUint64x2 x y mask)
-	// result: (VPRORVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPRORVQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateRightMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateRightMaskedUint64x4 x y mask)
-	// result: (VPRORVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPRORVQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRotateRightMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RotateRightMaskedUint64x8 x y mask)
-	// result: (VPRORVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPRORVQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpRoundToEven(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (RoundToEven x)
@@ -49535,114 +35609,6 @@ func rewriteValueAMD64_OpRoundToEvenScaledFloat64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RoundToEvenScaledMaskedFloat32x16 [a] x mask)
-	// result: (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPSMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 0)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RoundToEvenScaledMaskedFloat32x4 [a] x mask)
-	// result: (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPSMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 0)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RoundToEvenScaledMaskedFloat32x8 [a] x mask)
-	// result: (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPSMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 0)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RoundToEvenScaledMaskedFloat64x2 [a] x mask)
-	// result: (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPDMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 0)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RoundToEvenScaledMaskedFloat64x4 [a] x mask)
-	// result: (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPDMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 0)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RoundToEvenScaledMaskedFloat64x8 [a] x mask)
-	// result: (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPDMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 0)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x16(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (RoundToEvenScaledResidueFloat32x16 [a] x)
@@ -49721,114 +35687,6 @@ func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RoundToEvenScaledResidueMaskedFloat32x16 [a] x mask)
-	// result: (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPSMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 0)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RoundToEvenScaledResidueMaskedFloat32x4 [a] x mask)
-	// result: (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPSMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 0)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RoundToEvenScaledResidueMaskedFloat32x8 [a] x mask)
-	// result: (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPSMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 0)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RoundToEvenScaledResidueMaskedFloat64x2 [a] x mask)
-	// result: (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPDMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 0)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RoundToEvenScaledResidueMaskedFloat64x4 [a] x mask)
-	// result: (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPDMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 0)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (RoundToEvenScaledResidueMaskedFloat64x8 [a] x mask)
-	// result: (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPDMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 0)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpRsh16Ux16(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -51157,114 +37015,6 @@ func rewriteValueAMD64_OpRsh8x8(v *Value) bool {
 	}
 	return false
 }
-func rewriteValueAMD64_OpScaleMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ScaleMaskedFloat32x16 x y mask)
-	// result: (VSCALEFPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VSCALEFPSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpScaleMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ScaleMaskedFloat32x4 x y mask)
-	// result: (VSCALEFPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VSCALEFPSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpScaleMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ScaleMaskedFloat32x8 x y mask)
-	// result: (VSCALEFPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VSCALEFPSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpScaleMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ScaleMaskedFloat64x2 x y mask)
-	// result: (VSCALEFPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VSCALEFPDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpScaleMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ScaleMaskedFloat64x4 x y mask)
-	// result: (VSCALEFPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VSCALEFPDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpScaleMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ScaleMaskedFloat64x8 x y mask)
-	// result: (VSCALEFPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VSCALEFPDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpSelect0(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
@@ -52250,2742 +38000,6 @@ func rewriteValueAMD64_OpSetLoUint8x64(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedInt16x16 [a] x y mask)
-	// result: (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDWMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedInt16x32 [a] x y mask)
-	// result: (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDWMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedInt16x8 [a] x y mask)
-	// result: (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDWMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedInt32x16 [a] x y mask)
-	// result: (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDDMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedInt32x4 [a] x y mask)
-	// result: (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDDMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedInt32x8 [a] x y mask)
-	// result: (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDDMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedInt64x2 [a] x y mask)
-	// result: (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDQMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedInt64x4 [a] x y mask)
-	// result: (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDQMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedInt64x8 [a] x y mask)
-	// result: (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDQMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedUint16x16 [a] x y mask)
-	// result: (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDWMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedUint16x32 [a] x y mask)
-	// result: (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDWMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedUint16x8 [a] x y mask)
-	// result: (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDWMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedUint32x16 [a] x y mask)
-	// result: (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDDMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedUint32x4 [a] x y mask)
-	// result: (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDDMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedUint32x8 [a] x y mask)
-	// result: (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDDMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedUint64x2 [a] x y mask)
-	// result: (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDQMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedUint64x4 [a] x y mask)
-	// result: (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDQMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftConcatMaskedUint64x8 [a] x y mask)
-	// result: (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHLDQMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedInt16x16 x y mask)
-	// result: (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedInt16x32 x y mask)
-	// result: (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedInt16x8 x y mask)
-	// result: (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedInt32x16 x y mask)
-	// result: (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedInt32x4 x y mask)
-	// result: (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedInt32x8 x y mask)
-	// result: (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedInt64x2 x y mask)
-	// result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedInt64x4 x y mask)
-	// result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedInt64x8 x y mask)
-	// result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedUint16x16 x y mask)
-	// result: (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedUint16x32 x y mask)
-	// result: (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedUint16x8 x y mask)
-	// result: (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedUint32x16 x y mask)
-	// result: (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedUint32x4 x y mask)
-	// result: (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedUint32x8 x y mask)
-	// result: (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedUint64x2 x y mask)
-	// result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedUint64x4 x y mask)
-	// result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllLeftMaskedUint64x8 x y mask)
-	// result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedInt16x16 [a] x y mask)
-	// result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDWMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedInt16x32 [a] x y mask)
-	// result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDWMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedInt16x8 [a] x y mask)
-	// result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDWMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedInt32x16 [a] x y mask)
-	// result: (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDDMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedInt32x4 [a] x y mask)
-	// result: (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDDMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedInt32x8 [a] x y mask)
-	// result: (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDDMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedInt64x2 [a] x y mask)
-	// result: (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDQMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedInt64x4 [a] x y mask)
-	// result: (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDQMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedInt64x8 [a] x y mask)
-	// result: (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDQMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedUint16x16 [a] x y mask)
-	// result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDWMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedUint16x32 [a] x y mask)
-	// result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDWMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedUint16x8 [a] x y mask)
-	// result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDWMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedUint32x16 [a] x y mask)
-	// result: (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDDMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedUint32x4 [a] x y mask)
-	// result: (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDDMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedUint32x8 [a] x y mask)
-	// result: (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDDMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedUint64x2 [a] x y mask)
-	// result: (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDQMasked128)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedUint64x4 [a] x y mask)
-	// result: (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDQMasked256)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightConcatMaskedUint64x8 [a] x y mask)
-	// result: (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSHRDQMasked512)
-		v.AuxInt = uint8ToAuxInt(a)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedInt16x16 x y mask)
-	// result: (VPSRAWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedInt16x32 x y mask)
-	// result: (VPSRAWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedInt16x8 x y mask)
-	// result: (VPSRAWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedInt32x16 x y mask)
-	// result: (VPSRADMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRADMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedInt32x4 x y mask)
-	// result: (VPSRADMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRADMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedInt32x8 x y mask)
-	// result: (VPSRADMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRADMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedInt64x2 x y mask)
-	// result: (VPSRAQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedInt64x4 x y mask)
-	// result: (VPSRAQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedInt64x8 x y mask)
-	// result: (VPSRAQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedUint16x16 x y mask)
-	// result: (VPSRLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedUint16x32 x y mask)
-	// result: (VPSRLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedUint16x8 x y mask)
-	// result: (VPSRLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedUint32x16 x y mask)
-	// result: (VPSRLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedUint32x4 x y mask)
-	// result: (VPSRLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedUint32x8 x y mask)
-	// result: (VPSRLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedUint64x2 x y mask)
-	// result: (VPSRLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedUint64x4 x y mask)
-	// result: (VPSRLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftAllRightMaskedUint64x8 x y mask)
-	// result: (VPSRLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedInt16x16 x y z mask)
-	// result: (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x32(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedInt16x32 x y z mask)
-	// result: (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedInt16x8 x y z mask)
-	// result: (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedInt32x16 x y z mask)
-	// result: (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedInt32x4 x y z mask)
-	// result: (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedInt32x8 x y z mask)
-	// result: (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x2(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedInt64x2 x y z mask)
-	// result: (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedInt64x4 x y z mask)
-	// result: (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedInt64x8 x y z mask)
-	// result: (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedUint16x16 x y z mask)
-	// result: (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x32(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedUint16x32 x y z mask)
-	// result: (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedUint16x8 x y z mask)
-	// result: (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedUint32x16 x y z mask)
-	// result: (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedUint32x4 x y z mask)
-	// result: (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedUint32x8 x y z mask)
-	// result: (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x2(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedUint64x2 x y z mask)
-	// result: (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedUint64x4 x y z mask)
-	// result: (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftConcatMaskedUint64x8 x y z mask)
-	// result: (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHLDVQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedInt16x16 x y mask)
-	// result: (VPSLLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedInt16x32 x y mask)
-	// result: (VPSLLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedInt16x8 x y mask)
-	// result: (VPSLLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedInt32x16 x y mask)
-	// result: (VPSLLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedInt32x4 x y mask)
-	// result: (VPSLLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedInt32x8 x y mask)
-	// result: (VPSLLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedInt64x2 x y mask)
-	// result: (VPSLLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedInt64x4 x y mask)
-	// result: (VPSLLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedInt64x8 x y mask)
-	// result: (VPSLLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedUint16x16 x y mask)
-	// result: (VPSLLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedUint16x32 x y mask)
-	// result: (VPSLLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedUint16x8 x y mask)
-	// result: (VPSLLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedUint32x16 x y mask)
-	// result: (VPSLLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedUint32x4 x y mask)
-	// result: (VPSLLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedUint32x8 x y mask)
-	// result: (VPSLLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedUint64x2 x y mask)
-	// result: (VPSLLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedUint64x4 x y mask)
-	// result: (VPSLLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftLeftMaskedUint64x8 x y mask)
-	// result: (VPSLLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSLLVQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt16x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedInt16x16 x y z mask)
-	// result: (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt16x32(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedInt16x32 x y z mask)
-	// result: (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt16x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedInt16x8 x y z mask)
-	// result: (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt32x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedInt32x16 x y z mask)
-	// result: (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt32x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedInt32x4 x y z mask)
-	// result: (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt32x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedInt32x8 x y z mask)
-	// result: (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt64x2(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedInt64x2 x y z mask)
-	// result: (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt64x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedInt64x4 x y z mask)
-	// result: (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt64x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedInt64x8 x y z mask)
-	// result: (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint16x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedUint16x16 x y z mask)
-	// result: (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint16x32(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedUint16x32 x y z mask)
-	// result: (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint16x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedUint16x8 x y z mask)
-	// result: (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint32x16(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedUint32x16 x y z mask)
-	// result: (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint32x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedUint32x4 x y z mask)
-	// result: (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint32x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedUint32x8 x y z mask)
-	// result: (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint64x2(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedUint64x2 x y z mask)
-	// result: (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint64x4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedUint64x4 x y z mask)
-	// result: (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint64x8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightConcatMaskedUint64x8 x y z mask)
-	// result: (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		z := v_2
-		mask := v_3
-		v.reset(OpAMD64VPSHRDVQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg4(x, y, z, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedInt16x16 x y mask)
-	// result: (VPSRAVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedInt16x32 x y mask)
-	// result: (VPSRAVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedInt16x8 x y mask)
-	// result: (VPSRAVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedInt32x16 x y mask)
-	// result: (VPSRAVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedInt32x4 x y mask)
-	// result: (VPSRAVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedInt32x8 x y mask)
-	// result: (VPSRAVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedInt64x2 x y mask)
-	// result: (VPSRAVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedInt64x4 x y mask)
-	// result: (VPSRAVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedInt64x8 x y mask)
-	// result: (VPSRAVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedUint16x16 x y mask)
-	// result: (VPSRLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLVWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedUint16x32 x y mask)
-	// result: (VPSRLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLVWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedUint16x8 x y mask)
-	// result: (VPSRLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLVWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedUint32x16 x y mask)
-	// result: (VPSRLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLVDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedUint32x4 x y mask)
-	// result: (VPSRLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLVDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedUint32x8 x y mask)
-	// result: (VPSRLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLVDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedUint64x2 x y mask)
-	// result: (VPSRLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLVQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedUint64x4 x y mask)
-	// result: (VPSRLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLVQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightMaskedUint64x8 x y mask)
-	// result: (VPSRLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRLVQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpSlicemask(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
@@ -55040,102 +38054,6 @@ func rewriteValueAMD64_OpSpectreSliceIndex(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpSqrtMaskedFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SqrtMaskedFloat32x16 x mask)
-	// result: (VSQRTPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VSQRTPSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSqrtMaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SqrtMaskedFloat32x4 x mask)
-	// result: (VSQRTPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VSQRTPSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSqrtMaskedFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SqrtMaskedFloat32x8 x mask)
-	// result: (VSQRTPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VSQRTPSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSqrtMaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SqrtMaskedFloat64x2 x mask)
-	// result: (VSQRTPDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VSQRTPDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSqrtMaskedFloat64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SqrtMaskedFloat64x4 x mask)
-	// result: (VSQRTPDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VSQRTPDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSqrtMaskedFloat64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SqrtMaskedFloat64x8 x mask)
-	// result: (VSQRTPDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VSQRTPDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpStore(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@@ -55673,762 +38591,6 @@ func rewriteValueAMD64_OpStoreMasked8(v *Value) bool {
 	}
 	return false
 }
-func rewriteValueAMD64_OpSubMaskedFloat32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedFloat32x16 x y mask)
-	// result: (VSUBPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VSUBPSMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedFloat32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedFloat32x4 x y mask)
-	// result: (VSUBPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VSUBPSMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedFloat32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedFloat32x8 x y mask)
-	// result: (VSUBPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VSUBPSMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedFloat64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedFloat64x2 x y mask)
-	// result: (VSUBPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VSUBPDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedFloat64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedFloat64x4 x y mask)
-	// result: (VSUBPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VSUBPDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedFloat64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedFloat64x8 x y mask)
-	// result: (VSUBPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VSUBPDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedInt16x16 x y mask)
-	// result: (VPSUBWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedInt16x32 x y mask)
-	// result: (VPSUBWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedInt16x8 x y mask)
-	// result: (VPSUBWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedInt32x16 x y mask)
-	// result: (VPSUBDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedInt32x4 x y mask)
-	// result: (VPSUBDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedInt32x8 x y mask)
-	// result: (VPSUBDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedInt64x2 x y mask)
-	// result: (VPSUBQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedInt64x4 x y mask)
-	// result: (VPSUBQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedInt64x8 x y mask)
-	// result: (VPSUBQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedInt8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedInt8x16 x y mask)
-	// result: (VPSUBBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedInt8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedInt8x32 x y mask)
-	// result: (VPSUBBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedInt8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedInt8x64 x y mask)
-	// result: (VPSUBBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedUint16x16 x y mask)
-	// result: (VPSUBWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedUint16x32 x y mask)
-	// result: (VPSUBWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedUint16x8 x y mask)
-	// result: (VPSUBWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedUint32x16 x y mask)
-	// result: (VPSUBDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedUint32x4 x y mask)
-	// result: (VPSUBDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedUint32x8 x y mask)
-	// result: (VPSUBDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedUint64x2 x y mask)
-	// result: (VPSUBQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedUint64x4 x y mask)
-	// result: (VPSUBQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedUint64x8 x y mask)
-	// result: (VPSUBQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedUint8x16 x y mask)
-	// result: (VPSUBBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedUint8x32 x y mask)
-	// result: (VPSUBBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubMaskedUint8x64 x y mask)
-	// result: (VPSUBBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubSaturatedMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubSaturatedMaskedInt16x16 x y mask)
-	// result: (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBSWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubSaturatedMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubSaturatedMaskedInt16x32 x y mask)
-	// result: (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBSWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubSaturatedMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubSaturatedMaskedInt16x8 x y mask)
-	// result: (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBSWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubSaturatedMaskedInt8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubSaturatedMaskedInt8x16 x y mask)
-	// result: (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBSBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubSaturatedMaskedInt8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubSaturatedMaskedInt8x32 x y mask)
-	// result: (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBSBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubSaturatedMaskedInt8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubSaturatedMaskedInt8x64 x y mask)
-	// result: (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBSBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubSaturatedMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubSaturatedMaskedUint16x16 x y mask)
-	// result: (VPSUBUSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBUSWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubSaturatedMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubSaturatedMaskedUint16x32 x y mask)
-	// result: (VPSUBUSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBUSWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubSaturatedMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubSaturatedMaskedUint16x8 x y mask)
-	// result: (VPSUBUSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBUSWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubSaturatedMaskedUint8x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubSaturatedMaskedUint8x16 x y mask)
-	// result: (VPSUBUSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBUSBMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubSaturatedMaskedUint8x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubSaturatedMaskedUint8x32 x y mask)
-	// result: (VPSUBUSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBUSBMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpSubSaturatedMaskedUint8x64(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (SubSaturatedMaskedUint8x64 x y mask)
-	// result: (VPSUBUSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSUBUSBMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpTrunc(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (Trunc x)
@@ -56567,114 +38729,6 @@ func rewriteValueAMD64_OpTruncScaledFloat64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpTruncScaledMaskedFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (TruncScaledMaskedFloat32x16 [a] x mask)
-	// result: (VRNDSCALEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPSMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 3)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpTruncScaledMaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (TruncScaledMaskedFloat32x4 [a] x mask)
-	// result: (VRNDSCALEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPSMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 3)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpTruncScaledMaskedFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (TruncScaledMaskedFloat32x8 [a] x mask)
-	// result: (VRNDSCALEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPSMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 3)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpTruncScaledMaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (TruncScaledMaskedFloat64x2 [a] x mask)
-	// result: (VRNDSCALEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPDMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 3)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpTruncScaledMaskedFloat64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (TruncScaledMaskedFloat64x4 [a] x mask)
-	// result: (VRNDSCALEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPDMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 3)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpTruncScaledMaskedFloat64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (TruncScaledMaskedFloat64x8 [a] x mask)
-	// result: (VRNDSCALEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VRNDSCALEPDMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 3)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpTruncScaledResidueFloat32x16(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (TruncScaledResidueFloat32x16 [a] x)
@@ -56753,330 +38807,6 @@ func rewriteValueAMD64_OpTruncScaledResidueFloat64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (TruncScaledResidueMaskedFloat32x16 [a] x mask)
-	// result: (VREDUCEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPSMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 3)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (TruncScaledResidueMaskedFloat32x4 [a] x mask)
-	// result: (VREDUCEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPSMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 3)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (TruncScaledResidueMaskedFloat32x8 [a] x mask)
-	// result: (VREDUCEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPSMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 3)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (TruncScaledResidueMaskedFloat64x2 [a] x mask)
-	// result: (VREDUCEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPDMasked128)
-		v.AuxInt = uint8ToAuxInt(a + 3)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (TruncScaledResidueMaskedFloat64x4 [a] x mask)
-	// result: (VREDUCEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPDMasked256)
-		v.AuxInt = uint8ToAuxInt(a + 3)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (TruncScaledResidueMaskedFloat64x8 [a] x mask)
-	// result: (VREDUCEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		a := auxIntToUint8(v.AuxInt)
-		x := v_0
-		mask := v_1
-		v.reset(OpAMD64VREDUCEPDMasked512)
-		v.AuxInt = uint8ToAuxInt(a + 3)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg2(x, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpXorMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (XorMaskedInt32x16 x y mask)
-	// result: (VPXORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPXORDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpXorMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (XorMaskedInt32x4 x y mask)
-	// result: (VPXORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPXORDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpXorMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (XorMaskedInt32x8 x y mask)
-	// result: (VPXORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPXORDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpXorMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (XorMaskedInt64x2 x y mask)
-	// result: (VPXORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPXORQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpXorMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (XorMaskedInt64x4 x y mask)
-	// result: (VPXORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPXORQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpXorMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (XorMaskedInt64x8 x y mask)
-	// result: (VPXORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPXORQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpXorMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (XorMaskedUint32x16 x y mask)
-	// result: (VPXORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPXORDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpXorMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (XorMaskedUint32x4 x y mask)
-	// result: (VPXORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPXORDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpXorMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (XorMaskedUint32x8 x y mask)
-	// result: (VPXORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPXORDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpXorMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (XorMaskedUint64x2 x y mask)
-	// result: (VPXORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPXORQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpXorMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (XorMaskedUint64x4 x y mask)
-	// result: (VPXORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPXORQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpXorMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (XorMaskedUint64x8 x y mask)
-	// result: (VPXORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPXORQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpZero(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go
index 90149300b2c..e6c6874bddc 100644
--- a/src/cmd/compile/internal/ssagen/simdintrinsics.go
+++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go
@@ -24,18 +24,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Int64x2.Abs", opLen1(ssa.OpAbsInt64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int64x4.Abs", opLen1(ssa.OpAbsInt64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int64x8.Abs", opLen1(ssa.OpAbsInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.AbsMasked", opLen2(ssa.OpAbsMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.AbsMasked", opLen2(ssa.OpAbsMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.AbsMasked", opLen2(ssa.OpAbsMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.AbsMasked", opLen2(ssa.OpAbsMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.AbsMasked", opLen2(ssa.OpAbsMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.AbsMasked", opLen2(ssa.OpAbsMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.AbsMasked", opLen2(ssa.OpAbsMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.AbsMasked", opLen2(ssa.OpAbsMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.AbsMasked", opLen2(ssa.OpAbsMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.AbsMasked", opLen2(ssa.OpAbsMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.AbsMasked", opLen2(ssa.OpAbsMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.AbsMasked", opLen2(ssa.OpAbsMaskedInt64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.Add", opLen2(ssa.OpAddFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Add", opLen2(ssa.OpAddFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.Add", opLen2(ssa.OpAddFloat32x16, types.TypeVec512), sys.AMD64)
@@ -69,51 +57,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Int32x4.AddDotProdPairsSaturated", opLen3(ssa.OpAddDotProdPairsSaturatedInt32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int32x8.AddDotProdPairsSaturated", opLen3(ssa.OpAddDotProdPairsSaturatedInt32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int32x16.AddDotProdPairsSaturated", opLen3(ssa.OpAddDotProdPairsSaturatedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.AddDotProdPairsSaturatedMasked", opLen4(ssa.OpAddDotProdPairsSaturatedMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.AddDotProdPairsSaturatedMasked", opLen4(ssa.OpAddDotProdPairsSaturatedMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.AddDotProdPairsSaturatedMasked", opLen4(ssa.OpAddDotProdPairsSaturatedMaskedInt32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int8x16.AddDotProdQuadruple", opLen3_31(ssa.OpAddDotProdQuadrupleInt32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.AddDotProdQuadruple", opLen3_31(ssa.OpAddDotProdQuadrupleInt32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int8x64.AddDotProdQuadruple", opLen3_31(ssa.OpAddDotProdQuadrupleInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.AddDotProdQuadrupleMasked", opLen4_31(ssa.OpAddDotProdQuadrupleMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.AddDotProdQuadrupleMasked", opLen4_31(ssa.OpAddDotProdQuadrupleMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.AddDotProdQuadrupleMasked", opLen4_31(ssa.OpAddDotProdQuadrupleMaskedInt32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int8x16.AddDotProdQuadrupleSaturated", opLen3_31(ssa.OpAddDotProdQuadrupleSaturatedInt32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.AddDotProdQuadrupleSaturated", opLen3_31(ssa.OpAddDotProdQuadrupleSaturatedInt32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int8x64.AddDotProdQuadrupleSaturated", opLen3_31(ssa.OpAddDotProdQuadrupleSaturatedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.AddDotProdQuadrupleSaturatedMasked", opLen4_31(ssa.OpAddDotProdQuadrupleSaturatedMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.AddDotProdQuadrupleSaturatedMasked", opLen4_31(ssa.OpAddDotProdQuadrupleSaturatedMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.AddDotProdQuadrupleSaturatedMasked", opLen4_31(ssa.OpAddDotProdQuadrupleSaturatedMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.AddMasked", opLen3(ssa.OpAddMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.AddMasked", opLen3(ssa.OpAddMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.AddMasked", opLen3(ssa.OpAddMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.AddMasked", opLen3(ssa.OpAddMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.AddMasked", opLen3(ssa.OpAddMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.AddMasked", opLen3(ssa.OpAddMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.AddMasked", opLen3(ssa.OpAddMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.AddMasked", opLen3(ssa.OpAddMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.AddMasked", opLen3(ssa.OpAddMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.AddMasked", opLen3(ssa.OpAddMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.AddMasked", opLen3(ssa.OpAddMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.AddMasked", opLen3(ssa.OpAddMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.AddMasked", opLen3(ssa.OpAddMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.AddMasked", opLen3(ssa.OpAddMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.AddMasked", opLen3(ssa.OpAddMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.AddMasked", opLen3(ssa.OpAddMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.AddMasked", opLen3(ssa.OpAddMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.AddMasked", opLen3(ssa.OpAddMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.AddMasked", opLen3(ssa.OpAddMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.AddMasked", opLen3(ssa.OpAddMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.AddMasked", opLen3(ssa.OpAddMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.AddMasked", opLen3(ssa.OpAddMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.AddMasked", opLen3(ssa.OpAddMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.AddMasked", opLen3(ssa.OpAddMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.AddMasked", opLen3(ssa.OpAddMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.AddMasked", opLen3(ssa.OpAddMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.AddMasked", opLen3(ssa.OpAddMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.AddMasked", opLen3(ssa.OpAddMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.AddMasked", opLen3(ssa.OpAddMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.AddMasked", opLen3(ssa.OpAddMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.AddPairs", opLen2(ssa.OpAddPairsFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.AddPairs", opLen2(ssa.OpAddPairsFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x2.AddPairs", opLen2(ssa.OpAddPairsFloat64x2, types.TypeVec128), sys.AMD64)
@@ -140,18 +89,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint16x8.AddSaturated", opLen2(ssa.OpAddSaturatedUint16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint16x16.AddSaturated", opLen2(ssa.OpAddSaturatedUint16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint16x32.AddSaturated", opLen2(ssa.OpAddSaturatedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.AddSub", opLen2(ssa.OpAddSubFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.AddSub", opLen2(ssa.OpAddSubFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x2.AddSub", opLen2(ssa.OpAddSubFloat64x2, types.TypeVec128), sys.AMD64)
@@ -180,18 +117,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.And", opLen2(ssa.OpAndUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.And", opLen2(ssa.OpAndUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.And", opLen2(ssa.OpAndUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.AndMasked", opLen3(ssa.OpAndMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.AndMasked", opLen3(ssa.OpAndMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.AndMasked", opLen3(ssa.OpAndMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.AndMasked", opLen3(ssa.OpAndMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.AndMasked", opLen3(ssa.OpAndMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.AndMasked", opLen3(ssa.OpAndMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.AndMasked", opLen3(ssa.OpAndMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.AndMasked", opLen3(ssa.OpAndMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.AndMasked", opLen3(ssa.OpAndMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.AndMasked", opLen3(ssa.OpAndMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.AndMasked", opLen3(ssa.OpAndMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.AndMasked", opLen3(ssa.OpAndMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int8x16.AndNot", opLen2_21(ssa.OpAndNotInt8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.AndNot", opLen2_21(ssa.OpAndNotInt8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int8x64.AndNot", opLen2_21(ssa.OpAndNotInt8x64, types.TypeVec512), sys.AMD64)
@@ -216,30 +141,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.AndNot", opLen2_21(ssa.OpAndNotUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.AndNot", opLen2_21(ssa.OpAndNotUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.AndNot", opLen2_21(ssa.OpAndNotUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint8x16.Average", opLen2(ssa.OpAverageUint8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint8x32.Average", opLen2(ssa.OpAverageUint8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint8x64.Average", opLen2(ssa.OpAverageUint8x64, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint16x8.Average", opLen2(ssa.OpAverageUint16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint16x16.Average", opLen2(ssa.OpAverageUint16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint16x32.Average", opLen2(ssa.OpAverageUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.AverageMasked", opLen3(ssa.OpAverageMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.AverageMasked", opLen3(ssa.OpAverageMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.AverageMasked", opLen3(ssa.OpAverageMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.AverageMasked", opLen3(ssa.OpAverageMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.AverageMasked", opLen3(ssa.OpAverageMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.AverageMasked", opLen3(ssa.OpAverageMaskedUint16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.Broadcast128", opLen1(ssa.OpBroadcast128Float32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x2.Broadcast128", opLen1(ssa.OpBroadcast128Float64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x16.Broadcast128", opLen1(ssa.OpBroadcast128Int8x16, types.TypeVec128), sys.AMD64)
@@ -250,16 +157,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint16x8.Broadcast128", opLen1(ssa.OpBroadcast128Uint16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint32x4.Broadcast128", opLen1(ssa.OpBroadcast128Uint32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x2.Broadcast128", opLen1(ssa.OpBroadcast128Uint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x4.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x2.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x16.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x8.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x4.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x2.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x16.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x8.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x4.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x2.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x4.Broadcast256", opLen1(ssa.OpBroadcast256Float32x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x2.Broadcast256", opLen1(ssa.OpBroadcast256Float64x2, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int8x16.Broadcast256", opLen1(ssa.OpBroadcast256Int8x16, types.TypeVec256), sys.AMD64)
@@ -270,16 +167,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint16x8.Broadcast256", opLen1(ssa.OpBroadcast256Uint16x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint32x4.Broadcast256", opLen1(ssa.OpBroadcast256Uint32x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x2.Broadcast256", opLen1(ssa.OpBroadcast256Uint64x2, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x4.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedFloat32x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x2.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedFloat64x2, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x16.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt8x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x8.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt16x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x4.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt32x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x2.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt64x2, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x16.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint8x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x8.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint16x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x4.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint32x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x2.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint64x2, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x4.Broadcast512", opLen1(ssa.OpBroadcast512Float32x4, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float64x2.Broadcast512", opLen1(ssa.OpBroadcast512Float64x2, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int8x16.Broadcast512", opLen1(ssa.OpBroadcast512Int8x16, types.TypeVec512), sys.AMD64)
@@ -290,16 +177,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint16x8.Broadcast512", opLen1(ssa.OpBroadcast512Uint16x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint32x4.Broadcast512", opLen1(ssa.OpBroadcast512Uint32x4, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint64x2.Broadcast512", opLen1(ssa.OpBroadcast512Uint64x2, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedFloat32x4, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedFloat64x2, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt8x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt16x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt32x4, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt64x2, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint8x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint16x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint32x4, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint64x2, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.Ceil", opLen1(ssa.OpCeilFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Ceil", opLen1(ssa.OpCeilFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x2.Ceil", opLen1(ssa.OpCeilFloat64x2, types.TypeVec128), sys.AMD64)
@@ -310,24 +187,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float64x2.CeilScaled", opLen1Imm8(ssa.OpCeilScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float64x4.CeilScaled", opLen1Imm8(ssa.OpCeilScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float64x8.CeilScaled", opLen1Imm8(ssa.OpCeilScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float32x4.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float32x8.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float32x16.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float64x2.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float64x4.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float64x8.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
 	addF(simdPackage, "Float32x4.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float32x8.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float32x16.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
 	addF(simdPackage, "Float64x2.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float64x4.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float64x8.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float32x4.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float32x8.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float32x16.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float64x2.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float64x4.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float64x8.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
 	addF(simdPackage, "Float32x4.Compress", opLen2(ssa.OpCompressFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Compress", opLen2(ssa.OpCompressFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.Compress", opLen2(ssa.OpCompressFloat32x16, types.TypeVec512), sys.AMD64)
@@ -361,15 +226,9 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float32x4.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.ConvertToInt32Masked", opLen2(ssa.OpConvertToInt32MaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.ConvertToInt32Masked", opLen2(ssa.OpConvertToInt32MaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.ConvertToInt32Masked", opLen2(ssa.OpConvertToInt32MaskedFloat32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int8x16.CopySign", opLen2(ssa.OpCopySignInt8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.CopySign", opLen2(ssa.OpCopySignInt8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int16x8.CopySign", opLen2(ssa.OpCopySignInt16x8, types.TypeVec128), sys.AMD64)
@@ -382,24 +241,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float64x2.Div", opLen2(ssa.OpDivFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.Div", opLen2(ssa.OpDivFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.Div", opLen2(ssa.OpDivFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.DivMasked", opLen3(ssa.OpDivMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.DivMasked", opLen3(ssa.OpDivMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.DivMasked", opLen3(ssa.OpDivMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.DivMasked", opLen3(ssa.OpDivMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.DivMasked", opLen3(ssa.OpDivMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.DivMasked", opLen3(ssa.OpDivMaskedFloat64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int16x8.DotProdPairs", opLen2(ssa.OpDotProdPairsInt16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int16x16.DotProdPairs", opLen2(ssa.OpDotProdPairsInt16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int16x32.DotProdPairs", opLen2(ssa.OpDotProdPairsInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.DotProdPairsMasked", opLen3(ssa.OpDotProdPairsMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.DotProdPairsMasked", opLen3(ssa.OpDotProdPairsMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.DotProdPairsMasked", opLen3(ssa.OpDotProdPairsMaskedInt16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint8x16.DotProdPairsSaturated", opLen2(ssa.OpDotProdPairsSaturatedUint8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint8x32.DotProdPairsSaturated", opLen2(ssa.OpDotProdPairsSaturatedUint8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint8x64.DotProdPairsSaturated", opLen2(ssa.OpDotProdPairsSaturatedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.DotProdPairsSaturatedMasked", opLen3(ssa.OpDotProdPairsSaturatedMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.DotProdPairsSaturatedMasked", opLen3(ssa.OpDotProdPairsSaturatedMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.DotProdPairsSaturatedMasked", opLen3(ssa.OpDotProdPairsSaturatedMaskedUint8x64, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int8x16.Equal", opLen2(ssa.OpEqualInt8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.Equal", opLen2(ssa.OpEqualInt8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int8x64.Equal", opLen2(ssa.OpEqualInt8x64, types.TypeVec512), sys.AMD64)
@@ -430,36 +277,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float64x2.Equal", opLen2(ssa.OpEqualFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.Equal", opLen2(ssa.OpEqualFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.Equal", opLen2(ssa.OpEqualFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.EqualMasked", opLen3(ssa.OpEqualMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.EqualMasked", opLen3(ssa.OpEqualMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.EqualMasked", opLen3(ssa.OpEqualMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.EqualMasked", opLen3(ssa.OpEqualMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.EqualMasked", opLen3(ssa.OpEqualMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.EqualMasked", opLen3(ssa.OpEqualMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.EqualMasked", opLen3(ssa.OpEqualMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.EqualMasked", opLen3(ssa.OpEqualMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.EqualMasked", opLen3(ssa.OpEqualMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.EqualMasked", opLen3(ssa.OpEqualMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.EqualMasked", opLen3(ssa.OpEqualMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.EqualMasked", opLen3(ssa.OpEqualMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.EqualMasked", opLen3(ssa.OpEqualMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.EqualMasked", opLen3(ssa.OpEqualMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.EqualMasked", opLen3(ssa.OpEqualMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.EqualMasked", opLen3(ssa.OpEqualMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.EqualMasked", opLen3(ssa.OpEqualMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.EqualMasked", opLen3(ssa.OpEqualMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.EqualMasked", opLen3(ssa.OpEqualMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.EqualMasked", opLen3(ssa.OpEqualMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.EqualMasked", opLen3(ssa.OpEqualMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.EqualMasked", opLen3(ssa.OpEqualMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.EqualMasked", opLen3(ssa.OpEqualMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.EqualMasked", opLen3(ssa.OpEqualMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.Expand", opLen2(ssa.OpExpandFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Expand", opLen2(ssa.OpExpandFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.Expand", opLen2(ssa.OpExpandFloat32x16, types.TypeVec512), sys.AMD64)
@@ -500,42 +317,21 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float64x2.FloorScaled", opLen1Imm8(ssa.OpFloorScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float64x4.FloorScaled", opLen1Imm8(ssa.OpFloorScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float64x8.FloorScaled", opLen1Imm8(ssa.OpFloorScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float32x4.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float32x8.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float32x16.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float64x2.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float64x4.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float64x8.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
 	addF(simdPackage, "Float32x4.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float32x8.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float32x16.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
 	addF(simdPackage, "Float64x2.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float64x4.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float64x8.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float32x4.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float32x8.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float32x16.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float64x2.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float64x4.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float64x8.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
 	addF(simdPackage, "Uint8x16.GaloisFieldAffineTransform", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformUint8x16, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Uint8x32.GaloisFieldAffineTransform", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformUint8x32, types.TypeVec256, 0), sys.AMD64)
 	addF(simdPackage, "Uint8x64.GaloisFieldAffineTransform", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformUint8x64, types.TypeVec512, 0), sys.AMD64)
 	addF(simdPackage, "Uint8x16.GaloisFieldAffineTransformInverse", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformInverseUint8x16, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Uint8x32.GaloisFieldAffineTransformInverse", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformInverseUint8x32, types.TypeVec256, 0), sys.AMD64)
 	addF(simdPackage, "Uint8x64.GaloisFieldAffineTransformInverse", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformInverseUint8x64, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint8x16.GaloisFieldAffineTransformInverseMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformInverseMaskedUint8x16, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint8x32.GaloisFieldAffineTransformInverseMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformInverseMaskedUint8x32, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint8x64.GaloisFieldAffineTransformInverseMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformInverseMaskedUint8x64, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint8x16.GaloisFieldAffineTransformMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformMaskedUint8x16, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint8x32.GaloisFieldAffineTransformMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformMaskedUint8x32, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint8x64.GaloisFieldAffineTransformMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformMaskedUint8x64, types.TypeVec512, 0), sys.AMD64)
 	addF(simdPackage, "Uint8x16.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint8x32.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint8x64.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x64, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.GetElem", opLen1Imm8(ssa.OpGetElemFloat32x4, types.Types[types.TFLOAT32], 0), sys.AMD64)
 	addF(simdPackage, "Float64x2.GetElem", opLen1Imm8(ssa.OpGetElemFloat64x2, types.Types[types.TFLOAT64], 0), sys.AMD64)
 	addF(simdPackage, "Int8x16.GetElem", opLen1Imm8(ssa.OpGetElemInt8x16, types.Types[types.TINT8], 0), sys.AMD64)
@@ -622,78 +418,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint16x32.GreaterEqual", opLen2(ssa.OpGreaterEqualUint16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint32x16.GreaterEqual", opLen2(ssa.OpGreaterEqualUint32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.IsNan", opLen2(ssa.OpIsNanFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.IsNan", opLen2(ssa.OpIsNanFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.IsNan", opLen2(ssa.OpIsNanFloat32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float64x2.IsNan", opLen2(ssa.OpIsNanFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.IsNan", opLen2(ssa.OpIsNanFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.IsNan", opLen2(ssa.OpIsNanFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.Less", opLen2(ssa.OpLessFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Less", opLen2(ssa.OpLessFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.Less", opLen2(ssa.OpLessFloat32x16, types.TypeVec512), sys.AMD64)
@@ -722,66 +452,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint16x32.LessEqual", opLen2(ssa.OpLessEqualUint16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint32x16.LessEqual", opLen2(ssa.OpLessEqualUint32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint64x8.LessEqual", opLen2(ssa.OpLessEqualUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.LessMasked", opLen3(ssa.OpLessMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.LessMasked", opLen3(ssa.OpLessMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.LessMasked", opLen3(ssa.OpLessMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.LessMasked", opLen3(ssa.OpLessMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.LessMasked", opLen3(ssa.OpLessMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.LessMasked", opLen3(ssa.OpLessMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.LessMasked", opLen3(ssa.OpLessMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.LessMasked", opLen3(ssa.OpLessMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.LessMasked", opLen3(ssa.OpLessMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.LessMasked", opLen3(ssa.OpLessMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.LessMasked", opLen3(ssa.OpLessMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.LessMasked", opLen3(ssa.OpLessMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.LessMasked", opLen3(ssa.OpLessMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.LessMasked", opLen3(ssa.OpLessMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.LessMasked", opLen3(ssa.OpLessMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.LessMasked", opLen3(ssa.OpLessMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.LessMasked", opLen3(ssa.OpLessMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.LessMasked", opLen3(ssa.OpLessMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.LessMasked", opLen3(ssa.OpLessMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.LessMasked", opLen3(ssa.OpLessMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.LessMasked", opLen3(ssa.OpLessMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.LessMasked", opLen3(ssa.OpLessMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.LessMasked", opLen3(ssa.OpLessMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.LessMasked", opLen3(ssa.OpLessMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.LessMasked", opLen3(ssa.OpLessMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.LessMasked", opLen3(ssa.OpLessMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.LessMasked", opLen3(ssa.OpLessMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.LessMasked", opLen3(ssa.OpLessMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.LessMasked", opLen3(ssa.OpLessMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.LessMasked", opLen3(ssa.OpLessMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.Max", opLen2(ssa.OpMaxFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Max", opLen2(ssa.OpMaxFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.Max", opLen2(ssa.OpMaxFloat32x16, types.TypeVec512), sys.AMD64)
@@ -812,36 +482,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.Max", opLen2(ssa.OpMaxUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.Max", opLen2(ssa.OpMaxUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.Max", opLen2(ssa.OpMaxUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.MaxMasked", opLen3(ssa.OpMaxMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.MaxMasked", opLen3(ssa.OpMaxMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.MaxMasked", opLen3(ssa.OpMaxMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.MaxMasked", opLen3(ssa.OpMaxMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.MaxMasked", opLen3(ssa.OpMaxMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.MaxMasked", opLen3(ssa.OpMaxMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.MaxMasked", opLen3(ssa.OpMaxMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.MaxMasked", opLen3(ssa.OpMaxMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.MaxMasked", opLen3(ssa.OpMaxMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.MaxMasked", opLen3(ssa.OpMaxMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.MaxMasked", opLen3(ssa.OpMaxMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.MaxMasked", opLen3(ssa.OpMaxMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.MaxMasked", opLen3(ssa.OpMaxMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.MaxMasked", opLen3(ssa.OpMaxMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.MaxMasked", opLen3(ssa.OpMaxMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.MaxMasked", opLen3(ssa.OpMaxMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.MaxMasked", opLen3(ssa.OpMaxMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.MaxMasked", opLen3(ssa.OpMaxMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.MaxMasked", opLen3(ssa.OpMaxMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.MaxMasked", opLen3(ssa.OpMaxMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.MaxMasked", opLen3(ssa.OpMaxMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.MaxMasked", opLen3(ssa.OpMaxMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.MaxMasked", opLen3(ssa.OpMaxMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.MaxMasked", opLen3(ssa.OpMaxMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.MaxMasked", opLen3(ssa.OpMaxMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.MaxMasked", opLen3(ssa.OpMaxMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.MaxMasked", opLen3(ssa.OpMaxMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.MaxMasked", opLen3(ssa.OpMaxMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.MaxMasked", opLen3(ssa.OpMaxMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.MaxMasked", opLen3(ssa.OpMaxMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.Min", opLen2(ssa.OpMinFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Min", opLen2(ssa.OpMinFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.Min", opLen2(ssa.OpMinFloat32x16, types.TypeVec512), sys.AMD64)
@@ -872,36 +512,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.Min", opLen2(ssa.OpMinUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.Min", opLen2(ssa.OpMinUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.Min", opLen2(ssa.OpMinUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.MinMasked", opLen3(ssa.OpMinMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.MinMasked", opLen3(ssa.OpMinMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.MinMasked", opLen3(ssa.OpMinMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.MinMasked", opLen3(ssa.OpMinMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.MinMasked", opLen3(ssa.OpMinMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.MinMasked", opLen3(ssa.OpMinMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.MinMasked", opLen3(ssa.OpMinMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.MinMasked", opLen3(ssa.OpMinMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.MinMasked", opLen3(ssa.OpMinMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.MinMasked", opLen3(ssa.OpMinMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.MinMasked", opLen3(ssa.OpMinMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.MinMasked", opLen3(ssa.OpMinMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.MinMasked", opLen3(ssa.OpMinMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.MinMasked", opLen3(ssa.OpMinMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.MinMasked", opLen3(ssa.OpMinMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.MinMasked", opLen3(ssa.OpMinMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.MinMasked", opLen3(ssa.OpMinMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.MinMasked", opLen3(ssa.OpMinMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.MinMasked", opLen3(ssa.OpMinMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.MinMasked", opLen3(ssa.OpMinMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.MinMasked", opLen3(ssa.OpMinMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.MinMasked", opLen3(ssa.OpMinMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.MinMasked", opLen3(ssa.OpMinMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.MinMasked", opLen3(ssa.OpMinMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.MinMasked", opLen3(ssa.OpMinMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.MinMasked", opLen3(ssa.OpMinMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.MinMasked", opLen3(ssa.OpMinMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.MinMasked", opLen3(ssa.OpMinMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.MinMasked", opLen3(ssa.OpMinMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.MinMasked", opLen3(ssa.OpMinMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.Mul", opLen2(ssa.OpMulFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Mul", opLen2(ssa.OpMulFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.Mul", opLen2(ssa.OpMulFloat32x16, types.TypeVec512), sys.AMD64)
@@ -932,24 +542,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float64x2.MulAdd", opLen3(ssa.OpMulAddFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.MulAdd", opLen3(ssa.OpMulAddFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.MulAdd", opLen3(ssa.OpMulAddFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.MulAddSub", opLen3(ssa.OpMulAddSubFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.MulAddSub", opLen3(ssa.OpMulAddSubFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.MulAddSub", opLen3(ssa.OpMulAddSubFloat32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float64x2.MulAddSub", opLen3(ssa.OpMulAddSubFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.MulAddSub", opLen3(ssa.OpMulAddSubFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.MulAddSub", opLen3(ssa.OpMulAddSubFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int32x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int32x8.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint32x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint32x4, types.TypeVec128), sys.AMD64)
@@ -960,48 +558,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint16x8.MulHigh", opLen2(ssa.OpMulHighUint16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint16x16.MulHigh", opLen2(ssa.OpMulHighUint16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint16x32.MulHigh", opLen2(ssa.OpMulHighUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.MulHighMasked", opLen3(ssa.OpMulHighMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.MulHighMasked", opLen3(ssa.OpMulHighMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.MulHighMasked", opLen3(ssa.OpMulHighMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.MulMasked", opLen3(ssa.OpMulMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.MulMasked", opLen3(ssa.OpMulMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.MulMasked", opLen3(ssa.OpMulMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.MulMasked", opLen3(ssa.OpMulMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.MulMasked", opLen3(ssa.OpMulMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.MulMasked", opLen3(ssa.OpMulMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.MulMasked", opLen3(ssa.OpMulMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.MulMasked", opLen3(ssa.OpMulMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.MulMasked", opLen3(ssa.OpMulMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.MulMasked", opLen3(ssa.OpMulMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.MulMasked", opLen3(ssa.OpMulMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.MulMasked", opLen3(ssa.OpMulMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.MulMasked", opLen3(ssa.OpMulMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.MulMasked", opLen3(ssa.OpMulMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.MulMasked", opLen3(ssa.OpMulMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.MulMasked", opLen3(ssa.OpMulMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.MulMasked", opLen3(ssa.OpMulMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.MulMasked", opLen3(ssa.OpMulMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.MulMasked", opLen3(ssa.OpMulMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.MulMasked", opLen3(ssa.OpMulMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.MulMasked", opLen3(ssa.OpMulMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.MulMasked", opLen3(ssa.OpMulMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.MulMasked", opLen3(ssa.OpMulMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.MulMasked", opLen3(ssa.OpMulMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.MulSubAdd", opLen3(ssa.OpMulSubAddFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.MulSubAdd", opLen3(ssa.OpMulSubAddFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.MulSubAdd", opLen3(ssa.OpMulSubAddFloat32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float64x2.MulSubAdd", opLen3(ssa.OpMulSubAddFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.MulSubAdd", opLen3(ssa.OpMulSubAddFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.MulSubAdd", opLen3(ssa.OpMulSubAddFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.NotEqual", opLen2(ssa.OpNotEqualFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.NotEqual", opLen2(ssa.OpNotEqualFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.NotEqual", opLen2(ssa.OpNotEqualFloat32x16, types.TypeVec512), sys.AMD64)
@@ -1016,36 +578,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint16x32.NotEqual", opLen2(ssa.OpNotEqualUint16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint32x16.NotEqual", opLen2(ssa.OpNotEqualUint32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint64x8.NotEqual", opLen2(ssa.OpNotEqualUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int8x16.OnesCount", opLen1(ssa.OpOnesCountInt8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.OnesCount", opLen1(ssa.OpOnesCountInt8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int8x64.OnesCount", opLen1(ssa.OpOnesCountInt8x64, types.TypeVec512), sys.AMD64)
@@ -1070,30 +602,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.OnesCount", opLen1(ssa.OpOnesCountUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.OnesCount", opLen1(ssa.OpOnesCountUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.OnesCount", opLen1(ssa.OpOnesCountUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int8x16.Or", opLen2(ssa.OpOrInt8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.Or", opLen2(ssa.OpOrInt8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int8x64.Or", opLen2(ssa.OpOrInt8x64, types.TypeVec512), sys.AMD64)
@@ -1118,18 +626,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.Or", opLen2(ssa.OpOrUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.Or", opLen2(ssa.OpOrUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.Or", opLen2(ssa.OpOrUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.OrMasked", opLen3(ssa.OpOrMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.OrMasked", opLen3(ssa.OpOrMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.OrMasked", opLen3(ssa.OpOrMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.OrMasked", opLen3(ssa.OpOrMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.OrMasked", opLen3(ssa.OpOrMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.OrMasked", opLen3(ssa.OpOrMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.OrMasked", opLen3(ssa.OpOrMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.OrMasked", opLen3(ssa.OpOrMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.OrMasked", opLen3(ssa.OpOrMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.OrMasked", opLen3(ssa.OpOrMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.OrMasked", opLen3(ssa.OpOrMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.OrMasked", opLen3(ssa.OpOrMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int8x16.Permute", opLen2_21(ssa.OpPermuteInt8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint8x16.Permute", opLen2_21(ssa.OpPermuteUint8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.Permute", opLen2_21(ssa.OpPermuteInt8x32, types.TypeVec256), sys.AMD64)
@@ -1184,84 +680,18 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float64x8.Permute2", opLen3_231(ssa.OpPermute2Float64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int64x8.Permute2", opLen3_231(ssa.OpPermute2Int64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint64x8.Permute2", opLen3_231(ssa.OpPermute2Uint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x32.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x64.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x32.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x2.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x2.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x32.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x64.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x32.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x4.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x4.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x4.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.Reciprocal", opLen1(ssa.OpReciprocalFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Reciprocal", opLen1(ssa.OpReciprocalFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.Reciprocal", opLen1(ssa.OpReciprocalFloat32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float64x2.Reciprocal", opLen1(ssa.OpReciprocalFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.Reciprocal", opLen1(ssa.OpReciprocalFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.Reciprocal", opLen1(ssa.OpReciprocalFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float64x2.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int32x4.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x4, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Int32x8.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x8, types.TypeVec256, 0), sys.AMD64)
 	addF(simdPackage, "Int32x16.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x16, types.TypeVec512, 0), sys.AMD64)
@@ -1274,18 +704,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint64x2, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Uint64x4.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint64x4, types.TypeVec256, 0), sys.AMD64)
 	addF(simdPackage, "Uint64x8.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint64x8, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int32x4.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt32x4, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int32x8.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt32x8, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int32x16.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt32x16, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int64x2.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt64x2, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int64x4.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt64x4, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int64x8.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt64x8, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x4.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint32x4, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x8.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint32x8, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x16.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint32x16, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x2.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint64x2, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x4.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint64x4, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x8.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint64x8, types.TypeVec512, 0), sys.AMD64)
 	addF(simdPackage, "Int32x4.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt32x4, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Int32x8.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt32x8, types.TypeVec256, 0), sys.AMD64)
 	addF(simdPackage, "Int32x16.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt32x16, types.TypeVec512, 0), sys.AMD64)
@@ -1298,18 +716,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint64x2, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Uint64x4.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint64x4, types.TypeVec256, 0), sys.AMD64)
 	addF(simdPackage, "Uint64x8.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint64x8, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int32x4.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt32x4, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int32x8.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt32x8, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int32x16.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt32x16, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int64x2.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt64x2, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int64x4.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt64x4, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int64x8.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt64x8, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x4.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint32x4, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x8.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint32x8, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x16.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint32x16, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x2.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint64x2, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x4.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint64x4, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x8.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint64x8, types.TypeVec512, 0), sys.AMD64)
 	addF(simdPackage, "Int32x4.RotateLeft", opLen2(ssa.OpRotateLeftInt32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int32x8.RotateLeft", opLen2(ssa.OpRotateLeftInt32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int32x16.RotateLeft", opLen2(ssa.OpRotateLeftInt32x16, types.TypeVec512), sys.AMD64)
@@ -1322,18 +728,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.RotateLeft", opLen2(ssa.OpRotateLeftUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.RotateLeft", opLen2(ssa.OpRotateLeftUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.RotateLeft", opLen2(ssa.OpRotateLeftUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int32x4.RotateRight", opLen2(ssa.OpRotateRightInt32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int32x8.RotateRight", opLen2(ssa.OpRotateRightInt32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int32x16.RotateRight", opLen2(ssa.OpRotateRightInt32x16, types.TypeVec512), sys.AMD64)
@@ -1346,18 +740,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.RotateRight", opLen2(ssa.OpRotateRightUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.RotateRight", opLen2(ssa.OpRotateRightUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.RotateRight", opLen2(ssa.OpRotateRightUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.RoundToEven", opLen1(ssa.OpRoundToEvenFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.RoundToEven", opLen1(ssa.OpRoundToEvenFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x2.RoundToEven", opLen1(ssa.OpRoundToEvenFloat64x2, types.TypeVec128), sys.AMD64)
@@ -1368,36 +750,18 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float64x2.RoundToEvenScaled", opLen1Imm8(ssa.OpRoundToEvenScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float64x4.RoundToEvenScaled", opLen1Imm8(ssa.OpRoundToEvenScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float64x8.RoundToEvenScaled", opLen1Imm8(ssa.OpRoundToEvenScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float32x4.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float32x8.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float32x16.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float64x2.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float64x4.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float64x8.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
 	addF(simdPackage, "Float32x4.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float32x8.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float32x16.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
 	addF(simdPackage, "Float64x2.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float64x4.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float64x8.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float32x4.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float32x8.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float32x16.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float64x2.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float64x4.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float64x8.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
 	addF(simdPackage, "Float32x4.Scale", opLen2(ssa.OpScaleFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Scale", opLen2(ssa.OpScaleFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.Scale", opLen2(ssa.OpScaleFloat32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float64x2.Scale", opLen2(ssa.OpScaleFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.Scale", opLen2(ssa.OpScaleFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.Scale", opLen2(ssa.OpScaleFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.SetElem", opLen2Imm8(ssa.OpSetElemFloat32x4, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Float64x2.SetElem", opLen2Imm8(ssa.OpSetElemFloat64x2, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Int8x16.SetElem", opLen2Imm8(ssa.OpSetElemInt8x16, types.TypeVec128, 0), sys.AMD64)
@@ -1484,42 +848,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatUint64x2, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Uint64x4.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatUint64x4, types.TypeVec256, 0), sys.AMD64)
 	addF(simdPackage, "Uint64x8.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatUint64x8, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt16x8, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt16x16, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt16x32, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt32x4, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt32x8, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt32x16, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt64x2, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt64x4, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt64x8, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint16x8, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint16x16, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint16x32, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint32x4, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint32x8, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint32x16, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint64x2, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint64x4, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint64x8, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int16x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int16x16.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int16x32.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x32, types.TypeVec512), sys.AMD64)
@@ -1556,42 +884,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatUint64x2, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Uint64x4.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatUint64x4, types.TypeVec256, 0), sys.AMD64)
 	addF(simdPackage, "Uint64x8.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatUint64x8, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt16x8, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt16x16, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt16x32, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt32x4, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt32x8, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt32x16, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt64x2, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt64x4, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt64x8, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint16x8, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint16x16, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint16x32, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint32x4, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint32x8, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint32x16, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint64x2, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint64x4, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint64x8, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int16x8.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int16x16.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int16x32.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x32, types.TypeVec512), sys.AMD64)
@@ -1628,42 +920,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int16x8.ShiftRight", opLen2(ssa.OpShiftRightInt16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int16x16.ShiftRight", opLen2(ssa.OpShiftRightInt16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int16x32.ShiftRight", opLen2(ssa.OpShiftRightInt16x32, types.TypeVec512), sys.AMD64)
@@ -1700,54 +956,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.Sqrt", opLen1(ssa.OpSqrtFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Sqrt", opLen1(ssa.OpSqrtFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.Sqrt", opLen1(ssa.OpSqrtFloat32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float64x2.Sqrt", opLen1(ssa.OpSqrtFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.Sqrt", opLen1(ssa.OpSqrtFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.Sqrt", opLen1(ssa.OpSqrtFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.Sub", opLen2(ssa.OpSubFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Sub", opLen2(ssa.OpSubFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.Sub", opLen2(ssa.OpSubFloat32x16, types.TypeVec512), sys.AMD64)
@@ -1778,36 +992,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.Sub", opLen2(ssa.OpSubUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.Sub", opLen2(ssa.OpSubUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.Sub", opLen2(ssa.OpSubUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.SubMasked", opLen3(ssa.OpSubMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.SubMasked", opLen3(ssa.OpSubMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.SubMasked", opLen3(ssa.OpSubMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.SubMasked", opLen3(ssa.OpSubMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.SubMasked", opLen3(ssa.OpSubMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.SubMasked", opLen3(ssa.OpSubMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.SubMasked", opLen3(ssa.OpSubMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.SubMasked", opLen3(ssa.OpSubMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.SubMasked", opLen3(ssa.OpSubMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.SubMasked", opLen3(ssa.OpSubMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.SubMasked", opLen3(ssa.OpSubMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.SubMasked", opLen3(ssa.OpSubMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.SubMasked", opLen3(ssa.OpSubMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.SubMasked", opLen3(ssa.OpSubMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.SubMasked", opLen3(ssa.OpSubMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.SubMasked", opLen3(ssa.OpSubMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.SubMasked", opLen3(ssa.OpSubMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.SubMasked", opLen3(ssa.OpSubMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.SubMasked", opLen3(ssa.OpSubMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.SubMasked", opLen3(ssa.OpSubMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.SubMasked", opLen3(ssa.OpSubMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.SubMasked", opLen3(ssa.OpSubMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.SubMasked", opLen3(ssa.OpSubMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.SubMasked", opLen3(ssa.OpSubMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.SubMasked", opLen3(ssa.OpSubMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.SubMasked", opLen3(ssa.OpSubMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.SubMasked", opLen3(ssa.OpSubMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.SubMasked", opLen3(ssa.OpSubMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.SubMasked", opLen3(ssa.OpSubMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.SubMasked", opLen3(ssa.OpSubMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.SubPairs", opLen2(ssa.OpSubPairsFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.SubPairs", opLen2(ssa.OpSubPairsFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x2.SubPairs", opLen2(ssa.OpSubPairsFloat64x2, types.TypeVec128), sys.AMD64)
@@ -1834,18 +1018,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint16x8.SubSaturated", opLen2(ssa.OpSubSaturatedUint16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint16x16.SubSaturated", opLen2(ssa.OpSubSaturatedUint16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint16x32.SubSaturated", opLen2(ssa.OpSubSaturatedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int8x64.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint8x64.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.Trunc", opLen1(ssa.OpTruncFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.Trunc", opLen1(ssa.OpTruncFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x2.Trunc", opLen1(ssa.OpTruncFloat64x2, types.TypeVec128), sys.AMD64)
@@ -1856,24 +1028,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float64x2.TruncScaled", opLen1Imm8(ssa.OpTruncScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float64x4.TruncScaled", opLen1Imm8(ssa.OpTruncScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float64x8.TruncScaled", opLen1Imm8(ssa.OpTruncScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float32x4.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float32x8.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float32x16.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float64x2.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float64x4.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float64x8.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
 	addF(simdPackage, "Float32x4.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float32x8.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float32x16.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
 	addF(simdPackage, "Float64x2.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
 	addF(simdPackage, "Float64x4.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
 	addF(simdPackage, "Float64x8.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float32x4.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float32x8.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float32x16.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-	addF(simdPackage, "Float64x2.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-	addF(simdPackage, "Float64x4.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-	addF(simdPackage, "Float64x8.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
 	addF(simdPackage, "Int8x16.Xor", opLen2(ssa.OpXorInt8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.Xor", opLen2(ssa.OpXorInt8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int8x64.Xor", opLen2(ssa.OpXorInt8x64, types.TypeVec512), sys.AMD64)
@@ -1898,18 +1058,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.Xor", opLen2(ssa.OpXorUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.Xor", opLen2(ssa.OpXorUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.Xor", opLen2(ssa.OpXorUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.XorMasked", opLen3(ssa.OpXorMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.XorMasked", opLen3(ssa.OpXorMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.XorMasked", opLen3(ssa.OpXorMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.XorMasked", opLen3(ssa.OpXorMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.XorMasked", opLen3(ssa.OpXorMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.XorMasked", opLen3(ssa.OpXorMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.XorMasked", opLen3(ssa.OpXorMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.XorMasked", opLen3(ssa.OpXorMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.XorMasked", opLen3(ssa.OpXorMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.XorMasked", opLen3(ssa.OpXorMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.XorMasked", opLen3(ssa.OpXorMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.XorMasked", opLen3(ssa.OpXorMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int8x16.blend", opLen3(ssa.OpblendInt8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.blend", opLen3(ssa.OpblendInt8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int8x64.blendMasked", opLen3(ssa.OpblendMaskedInt8x64, types.TypeVec512), sys.AMD64)
diff --git a/src/simd/_gen/simdgen/godefs.go b/src/simd/_gen/simdgen/godefs.go
index 22decb9d7e6..4044addd8c1 100644
--- a/src/simd/_gen/simdgen/godefs.go
+++ b/src/simd/_gen/simdgen/godefs.go
@@ -11,6 +11,7 @@ import (
 	"slices"
 	"strconv"
 	"strings"
+	"unicode"
 
 	"simd/_gen/unify"
 )
@@ -100,6 +101,11 @@ func (o *Operation) DecodeUnified(v *unify.Value) error {
 	o.Documentation = regexp.MustCompile(`\bNAME\b`).ReplaceAllString(o.Documentation, o.Go)
 	if isMasked {
 		o.Documentation += "\n//\n// This operation is applied selectively under a write mask."
+		if unicode.IsUpper([]rune(o.Go)[0]) {
+			trueVal := "true"
+			o.NoGenericOps = &trueVal
+			o.NoTypes = &trueVal
+		}
 	}
 
 	o.In = append(o.rawOperation.In, o.rawOperation.InVariant...)
diff --git a/src/simd/compare_test.go b/src/simd/compare_test.go
index 7fd20cf5d79..f8526d27e98 100644
--- a/src/simd/compare_test.go
+++ b/src/simd/compare_test.go
@@ -15,44 +15,6 @@ import (
 // from > and =
 var comparisonFixed bool = simd.HasAVX512()
 
-func TestLessMasked(t *testing.T) {
-	if simd.HasAVX512() {
-		testFloat32x4CompareMasked(t, simd.Float32x4.LessMasked, lessSlice[float32])
-		testFloat32x8CompareMasked(t, simd.Float32x8.LessMasked, lessSlice[float32])
-		testFloat64x2CompareMasked(t, simd.Float64x2.LessMasked, lessSlice[float64])
-		testFloat64x4CompareMasked(t, simd.Float64x4.LessMasked, lessSlice[float64])
-
-		testInt16x16CompareMasked(t, simd.Int16x16.LessMasked, lessSlice[int16])
-		testInt16x8CompareMasked(t, simd.Int16x8.LessMasked, lessSlice[int16])
-		testInt32x4CompareMasked(t, simd.Int32x4.LessMasked, lessSlice[int32])
-		testInt32x8CompareMasked(t, simd.Int32x8.LessMasked, lessSlice[int32])
-		testInt64x2CompareMasked(t, simd.Int64x2.LessMasked, lessSlice[int64])
-		testInt64x4CompareMasked(t, simd.Int64x4.LessMasked, lessSlice[int64])
-		testInt8x16CompareMasked(t, simd.Int8x16.LessMasked, lessSlice[int8])
-		testInt8x32CompareMasked(t, simd.Int8x32.LessMasked, lessSlice[int8])
-
-		testUint16x16CompareMasked(t, simd.Uint16x16.LessMasked, lessSlice[uint16])
-		testUint16x8CompareMasked(t, simd.Uint16x8.LessMasked, lessSlice[uint16])
-		testUint32x4CompareMasked(t, simd.Uint32x4.LessMasked, lessSlice[uint32])
-		testUint32x8CompareMasked(t, simd.Uint32x8.LessMasked, lessSlice[uint32])
-		testUint64x2CompareMasked(t, simd.Uint64x2.LessMasked, lessSlice[uint64])
-		testUint64x4CompareMasked(t, simd.Uint64x4.LessMasked, lessSlice[uint64])
-		testUint8x16CompareMasked(t, simd.Uint8x16.LessMasked, lessSlice[uint8])
-		testUint8x32CompareMasked(t, simd.Uint8x32.LessMasked, lessSlice[uint8])
-
-		testFloat32x16CompareMasked(t, simd.Float32x16.LessMasked, lessSlice[float32])
-		testFloat64x8CompareMasked(t, simd.Float64x8.LessMasked, lessSlice[float64])
-		testInt8x64CompareMasked(t, simd.Int8x64.LessMasked, lessSlice[int8])
-		testInt16x32CompareMasked(t, simd.Int16x32.LessMasked, lessSlice[int16])
-		testInt32x16CompareMasked(t, simd.Int32x16.LessMasked, lessSlice[int32])
-		testInt64x8CompareMasked(t, simd.Int64x8.LessMasked, lessSlice[int64])
-		testUint8x64CompareMasked(t, simd.Uint8x64.LessMasked, lessSlice[uint8])
-		testUint16x32CompareMasked(t, simd.Uint16x32.LessMasked, lessSlice[uint16])
-		testUint32x16CompareMasked(t, simd.Uint32x16.LessMasked, lessSlice[uint32])
-		testUint64x8CompareMasked(t, simd.Uint64x8.LessMasked, lessSlice[uint64])
-	}
-}
-
 func TestLess(t *testing.T) {
 	testFloat32x4Compare(t, simd.Float32x4.Less, lessSlice[float32])
 	testFloat32x8Compare(t, simd.Float32x8.Less, lessSlice[float32])
diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go
index d6fcd065bbb..76bbf738cb1 100644
--- a/src/simd/ops_amd64.go
+++ b/src/simd/ops_amd64.go
@@ -66,92 +66,6 @@ func (x Int64x4) Abs() Int64x4
 // Asm: VPABSQ, CPU Feature: AVX512
 func (x Int64x8) Abs() Int64x8
 
-/* AbsMasked */
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSB, CPU Feature: AVX512
-func (x Int8x16) AbsMasked(mask Mask8x16) Int8x16
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSB, CPU Feature: AVX512
-func (x Int8x32) AbsMasked(mask Mask8x32) Int8x32
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSB, CPU Feature: AVX512
-func (x Int8x64) AbsMasked(mask Mask8x64) Int8x64
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSW, CPU Feature: AVX512
-func (x Int16x8) AbsMasked(mask Mask16x8) Int16x8
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSW, CPU Feature: AVX512
-func (x Int16x16) AbsMasked(mask Mask16x16) Int16x16
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSW, CPU Feature: AVX512
-func (x Int16x32) AbsMasked(mask Mask16x32) Int16x32
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSD, CPU Feature: AVX512
-func (x Int32x4) AbsMasked(mask Mask32x4) Int32x4
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSD, CPU Feature: AVX512
-func (x Int32x8) AbsMasked(mask Mask32x8) Int32x8
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSD, CPU Feature: AVX512
-func (x Int32x16) AbsMasked(mask Mask32x16) Int32x16
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSQ, CPU Feature: AVX512
-func (x Int64x2) AbsMasked(mask Mask64x2) Int64x2
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSQ, CPU Feature: AVX512
-func (x Int64x4) AbsMasked(mask Mask64x4) Int64x4
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSQ, CPU Feature: AVX512
-func (x Int64x8) AbsMasked(mask Mask64x8) Int64x8
-
 /* Add */
 
 // Add adds corresponding elements of two vectors.
@@ -321,29 +235,6 @@ func (x Int32x8) AddDotProdPairsSaturated(y Int16x16, z Int16x16) Int32x8
 // Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
 func (x Int32x16) AddDotProdPairsSaturated(y Int16x32, z Int16x32) Int32x16
 
-/* AddDotProdPairsSaturatedMasked */
-
-// AddDotProdPairsSaturatedMasked performs dot products on pairs of elements of y and z and then adds x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x4) AddDotProdPairsSaturatedMasked(y Int16x8, z Int16x8, mask Mask32x4) Int32x4
-
-// AddDotProdPairsSaturatedMasked performs dot products on pairs of elements of y and z and then adds x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x8) AddDotProdPairsSaturatedMasked(y Int16x16, z Int16x16, mask Mask32x8) Int32x8
-
-// AddDotProdPairsSaturatedMasked performs dot products on pairs of elements of y and z and then adds x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x16) AddDotProdPairsSaturatedMasked(y Int16x32, z Int16x32, mask Mask32x16) Int32x16
-
 /* AddDotProdQuadruple */
 
 // AddDotProdQuadruple performs dot products on groups of 4 elements of x and y and then adds z.
@@ -361,29 +252,6 @@ func (x Int8x32) AddDotProdQuadruple(y Uint8x32, z Int32x8) Int32x8
 // Asm: VPDPBUSD, CPU Feature: AVX512VNNI
 func (x Int8x64) AddDotProdQuadruple(y Uint8x64, z Int32x16) Int32x16
 
-/* AddDotProdQuadrupleMasked */
-
-// AddDotProdQuadrupleMasked performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Int8x16) AddDotProdQuadrupleMasked(y Uint8x16, z Int32x4, mask Mask32x4) Int32x4
-
-// AddDotProdQuadrupleMasked performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Int8x32) AddDotProdQuadrupleMasked(y Uint8x32, z Int32x8, mask Mask32x8) Int32x8
-
-// AddDotProdQuadrupleMasked performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Int8x64) AddDotProdQuadrupleMasked(y Uint8x64, z Int32x16, mask Mask32x16) Int32x16
-
 /* AddDotProdQuadrupleSaturated */
 
 // AddDotProdQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y and then adds z.
@@ -401,241 +269,6 @@ func (x Int8x32) AddDotProdQuadrupleSaturated(y Uint8x32, z Int32x8) Int32x8
 // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
 func (x Int8x64) AddDotProdQuadrupleSaturated(y Uint8x64, z Int32x16) Int32x16
 
-/* AddDotProdQuadrupleSaturatedMasked */
-
-// AddDotProdQuadrupleSaturatedMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int8x16) AddDotProdQuadrupleSaturatedMasked(y Uint8x16, z Int32x4, mask Mask32x4) Int32x4
-
-// AddDotProdQuadrupleSaturatedMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int8x32) AddDotProdQuadrupleSaturatedMasked(y Uint8x32, z Int32x8, mask Mask32x8) Int32x8
-
-// AddDotProdQuadrupleSaturatedMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int8x64) AddDotProdQuadrupleSaturatedMasked(y Uint8x64, z Int32x16, mask Mask32x16) Int32x16
-
-/* AddMasked */
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VADDPS, CPU Feature: AVX512
-func (x Float32x4) AddMasked(y Float32x4, mask Mask32x4) Float32x4
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VADDPS, CPU Feature: AVX512
-func (x Float32x8) AddMasked(y Float32x8, mask Mask32x8) Float32x8
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VADDPS, CPU Feature: AVX512
-func (x Float32x16) AddMasked(y Float32x16, mask Mask32x16) Float32x16
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VADDPD, CPU Feature: AVX512
-func (x Float64x2) AddMasked(y Float64x2, mask Mask64x2) Float64x2
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VADDPD, CPU Feature: AVX512
-func (x Float64x4) AddMasked(y Float64x4, mask Mask64x4) Float64x4
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VADDPD, CPU Feature: AVX512
-func (x Float64x8) AddMasked(y Float64x8, mask Mask64x8) Float64x8
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDB, CPU Feature: AVX512
-func (x Int8x16) AddMasked(y Int8x16, mask Mask8x16) Int8x16
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDB, CPU Feature: AVX512
-func (x Int8x32) AddMasked(y Int8x32, mask Mask8x32) Int8x32
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDB, CPU Feature: AVX512
-func (x Int8x64) AddMasked(y Int8x64, mask Mask8x64) Int8x64
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDW, CPU Feature: AVX512
-func (x Int16x8) AddMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDW, CPU Feature: AVX512
-func (x Int16x16) AddMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDW, CPU Feature: AVX512
-func (x Int16x32) AddMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDD, CPU Feature: AVX512
-func (x Int32x4) AddMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDD, CPU Feature: AVX512
-func (x Int32x8) AddMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDD, CPU Feature: AVX512
-func (x Int32x16) AddMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDQ, CPU Feature: AVX512
-func (x Int64x2) AddMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDQ, CPU Feature: AVX512
-func (x Int64x4) AddMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDQ, CPU Feature: AVX512
-func (x Int64x8) AddMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDB, CPU Feature: AVX512
-func (x Uint8x16) AddMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDB, CPU Feature: AVX512
-func (x Uint8x32) AddMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDB, CPU Feature: AVX512
-func (x Uint8x64) AddMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDW, CPU Feature: AVX512
-func (x Uint16x8) AddMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDW, CPU Feature: AVX512
-func (x Uint16x16) AddMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDW, CPU Feature: AVX512
-func (x Uint16x32) AddMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDD, CPU Feature: AVX512
-func (x Uint32x4) AddMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDD, CPU Feature: AVX512
-func (x Uint32x8) AddMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDD, CPU Feature: AVX512
-func (x Uint32x16) AddMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDQ, CPU Feature: AVX512
-func (x Uint64x2) AddMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDQ, CPU Feature: AVX512
-func (x Uint64x4) AddMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDQ, CPU Feature: AVX512
-func (x Uint64x8) AddMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* AddPairs */
 
 // AddPairs horizontally adds adjacent pairs of elements.
@@ -786,92 +419,6 @@ func (x Uint16x16) AddSaturated(y Uint16x16) Uint16x16
 // Asm: VPADDUSW, CPU Feature: AVX512
 func (x Uint16x32) AddSaturated(y Uint16x32) Uint16x32
 
-/* AddSaturatedMasked */
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDSB, CPU Feature: AVX512
-func (x Int8x16) AddSaturatedMasked(y Int8x16, mask Mask8x16) Int8x16
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDSB, CPU Feature: AVX512
-func (x Int8x32) AddSaturatedMasked(y Int8x32, mask Mask8x32) Int8x32
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDSB, CPU Feature: AVX512
-func (x Int8x64) AddSaturatedMasked(y Int8x64, mask Mask8x64) Int8x64
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDSW, CPU Feature: AVX512
-func (x Int16x8) AddSaturatedMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDSW, CPU Feature: AVX512
-func (x Int16x16) AddSaturatedMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDSW, CPU Feature: AVX512
-func (x Int16x32) AddSaturatedMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDUSB, CPU Feature: AVX512
-func (x Uint8x16) AddSaturatedMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDUSB, CPU Feature: AVX512
-func (x Uint8x32) AddSaturatedMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDUSB, CPU Feature: AVX512
-func (x Uint8x64) AddSaturatedMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDUSW, CPU Feature: AVX512
-func (x Uint16x8) AddSaturatedMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDUSW, CPU Feature: AVX512
-func (x Uint16x16) AddSaturatedMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDUSW, CPU Feature: AVX512
-func (x Uint16x32) AddSaturatedMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
 /* AddSub */
 
 // AddSub subtracts even elements and adds odd elements of two vectors.
@@ -1016,92 +563,6 @@ func (x Uint64x4) And(y Uint64x4) Uint64x4
 // Asm: VPANDQ, CPU Feature: AVX512
 func (x Uint64x8) And(y Uint64x8) Uint64x8
 
-/* AndMasked */
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDD, CPU Feature: AVX512
-func (x Int32x4) AndMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDD, CPU Feature: AVX512
-func (x Int32x8) AndMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDD, CPU Feature: AVX512
-func (x Int32x16) AndMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDQ, CPU Feature: AVX512
-func (x Int64x2) AndMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDQ, CPU Feature: AVX512
-func (x Int64x4) AndMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDQ, CPU Feature: AVX512
-func (x Int64x8) AndMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDD, CPU Feature: AVX512
-func (x Uint32x4) AndMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDD, CPU Feature: AVX512
-func (x Uint32x8) AndMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDD, CPU Feature: AVX512
-func (x Uint32x16) AndMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDQ, CPU Feature: AVX512
-func (x Uint64x2) AndMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDQ, CPU Feature: AVX512
-func (x Uint64x4) AndMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDQ, CPU Feature: AVX512
-func (x Uint64x8) AndMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* AndNot */
 
 // AndNot performs a bitwise x &^ y.
@@ -1224,92 +685,6 @@ func (x Uint64x4) AndNot(y Uint64x4) Uint64x4
 // Asm: VPANDNQ, CPU Feature: AVX512
 func (x Uint64x8) AndNot(y Uint64x8) Uint64x8
 
-/* AndNotMasked */
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDND, CPU Feature: AVX512
-func (x Int32x4) AndNotMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDND, CPU Feature: AVX512
-func (x Int32x8) AndNotMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDND, CPU Feature: AVX512
-func (x Int32x16) AndNotMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDNQ, CPU Feature: AVX512
-func (x Int64x2) AndNotMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDNQ, CPU Feature: AVX512
-func (x Int64x4) AndNotMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDNQ, CPU Feature: AVX512
-func (x Int64x8) AndNotMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDND, CPU Feature: AVX512
-func (x Uint32x4) AndNotMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDND, CPU Feature: AVX512
-func (x Uint32x8) AndNotMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDND, CPU Feature: AVX512
-func (x Uint32x16) AndNotMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDNQ, CPU Feature: AVX512
-func (x Uint64x2) AndNotMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDNQ, CPU Feature: AVX512
-func (x Uint64x4) AndNotMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDNQ, CPU Feature: AVX512
-func (x Uint64x8) AndNotMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* Average */
 
 // Average computes the rounded average of corresponding elements.
@@ -1342,50 +717,6 @@ func (x Uint16x16) Average(y Uint16x16) Uint16x16
 // Asm: VPAVGW, CPU Feature: AVX512
 func (x Uint16x32) Average(y Uint16x32) Uint16x32
 
-/* AverageMasked */
-
-// AverageMasked computes the rounded average of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPAVGB, CPU Feature: AVX512
-func (x Uint8x16) AverageMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// AverageMasked computes the rounded average of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPAVGB, CPU Feature: AVX512
-func (x Uint8x32) AverageMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// AverageMasked computes the rounded average of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPAVGB, CPU Feature: AVX512
-func (x Uint8x64) AverageMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
-// AverageMasked computes the rounded average of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPAVGW, CPU Feature: AVX512
-func (x Uint16x8) AverageMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// AverageMasked computes the rounded average of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPAVGW, CPU Feature: AVX512
-func (x Uint16x16) AverageMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// AverageMasked computes the rounded average of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPAVGW, CPU Feature: AVX512
-func (x Uint16x32) AverageMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
 /* Broadcast128 */
 
 // Broadcast128 copies element zero of its (128-bit) input to all elements of
@@ -1448,88 +779,6 @@ func (x Uint32x4) Broadcast128() Uint32x4
 // Asm: VPBROADCASTQ, CPU Feature: AVX2
 func (x Uint64x2) Broadcast128() Uint64x2
 
-/* Broadcast128Masked */
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VBROADCASTSS, CPU Feature: AVX512
-func (x Float32x4) Broadcast128Masked(mask Mask32x4) Float32x4
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Float64x2) Broadcast128Masked(mask Mask64x2) Float64x2
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Int8x16) Broadcast128Masked(mask Mask8x16) Int8x16
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Int16x8) Broadcast128Masked(mask Mask16x8) Int16x8
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Int32x4) Broadcast128Masked(mask Mask32x4) Int32x4
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Int64x2) Broadcast128Masked(mask Mask64x2) Int64x2
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Uint8x16) Broadcast128Masked(mask Mask8x16) Uint8x16
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Uint16x8) Broadcast128Masked(mask Mask16x8) Uint16x8
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Uint32x4) Broadcast128Masked(mask Mask32x4) Uint32x4
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Uint64x2) Broadcast128Masked(mask Mask64x2) Uint64x2
-
 /* Broadcast256 */
 
 // Broadcast256 copies element zero of its (128-bit) input to all elements of
@@ -1592,88 +841,6 @@ func (x Uint32x4) Broadcast256() Uint32x8
 // Asm: VPBROADCASTQ, CPU Feature: AVX2
 func (x Uint64x2) Broadcast256() Uint64x4
 
-/* Broadcast256Masked */
-
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VBROADCASTSS, CPU Feature: AVX512
-func (x Float32x4) Broadcast256Masked(mask Mask32x4) Float32x8
-
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VBROADCASTSD, CPU Feature: AVX512
-func (x Float64x2) Broadcast256Masked(mask Mask64x2) Float64x4
-
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Int8x16) Broadcast256Masked(mask Mask8x16) Int8x32
-
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Int16x8) Broadcast256Masked(mask Mask16x8) Int16x16
-
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Int32x4) Broadcast256Masked(mask Mask32x4) Int32x8
-
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Int64x2) Broadcast256Masked(mask Mask64x2) Int64x4
-
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Uint8x16) Broadcast256Masked(mask Mask8x16) Uint8x32
-
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Uint16x8) Broadcast256Masked(mask Mask16x8) Uint16x16
-
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Uint32x4) Broadcast256Masked(mask Mask32x4) Uint32x8
-
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Uint64x2) Broadcast256Masked(mask Mask64x2) Uint64x4
-
 /* Broadcast512 */
 
 // Broadcast512 copies element zero of its (128-bit) input to all elements of
@@ -1736,88 +903,6 @@ func (x Uint32x4) Broadcast512() Uint32x16
 // Asm: VPBROADCASTQ, CPU Feature: AVX512
 func (x Uint64x2) Broadcast512() Uint64x8
 
-/* Broadcast512Masked */
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VBROADCASTSS, CPU Feature: AVX512
-func (x Float32x4) Broadcast512Masked(mask Mask32x4) Float32x16
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VBROADCASTSD, CPU Feature: AVX512
-func (x Float64x2) Broadcast512Masked(mask Mask64x2) Float64x8
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Int8x16) Broadcast512Masked(mask Mask8x16) Int8x64
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Int16x8) Broadcast512Masked(mask Mask16x8) Int16x32
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Int32x4) Broadcast512Masked(mask Mask32x4) Int32x16
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Int64x2) Broadcast512Masked(mask Mask64x2) Int64x8
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Uint8x16) Broadcast512Masked(mask Mask8x16) Uint8x64
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Uint16x8) Broadcast512Masked(mask Mask16x8) Uint16x32
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Uint32x4) Broadcast512Masked(mask Mask32x4) Uint32x16
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Uint64x2) Broadcast512Masked(mask Mask64x2) Uint64x8
-
 /* Ceil */
 
 // Ceil rounds elements up to the nearest integer.
@@ -1884,62 +969,6 @@ func (x Float64x4) CeilScaled(prec uint8) Float64x4
 // Asm: VRNDSCALEPD, CPU Feature: AVX512
 func (x Float64x8) CeilScaled(prec uint8) Float64x8
 
-/* CeilScaledMasked */
-
-// CeilScaledMasked rounds elements up with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x4) CeilScaledMasked(prec uint8, mask Mask32x4) Float32x4
-
-// CeilScaledMasked rounds elements up with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x8) CeilScaledMasked(prec uint8, mask Mask32x8) Float32x8
-
-// CeilScaledMasked rounds elements up with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x16) CeilScaledMasked(prec uint8, mask Mask32x16) Float32x16
-
-// CeilScaledMasked rounds elements up with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x2) CeilScaledMasked(prec uint8, mask Mask64x2) Float64x2
-
-// CeilScaledMasked rounds elements up with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x4) CeilScaledMasked(prec uint8, mask Mask64x4) Float64x4
-
-// CeilScaledMasked rounds elements up with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x8) CeilScaledMasked(prec uint8, mask Mask64x8) Float64x8
-
 /* CeilScaledResidue */
 
 // CeilScaledResidue computes the difference after ceiling with specified precision.
@@ -1984,62 +1013,6 @@ func (x Float64x4) CeilScaledResidue(prec uint8) Float64x4
 // Asm: VREDUCEPD, CPU Feature: AVX512
 func (x Float64x8) CeilScaledResidue(prec uint8) Float64x8
 
-/* CeilScaledResidueMasked */
-
-// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x4) CeilScaledResidueMasked(prec uint8, mask Mask32x4) Float32x4
-
-// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x8) CeilScaledResidueMasked(prec uint8, mask Mask32x8) Float32x8
-
-// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x16) CeilScaledResidueMasked(prec uint8, mask Mask32x16) Float32x16
-
-// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x2) CeilScaledResidueMasked(prec uint8, mask Mask64x2) Float64x2
-
-// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x4) CeilScaledResidueMasked(prec uint8, mask Mask64x4) Float64x4
-
-// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x8) CeilScaledResidueMasked(prec uint8, mask Mask64x8) Float64x8
-
 /* Compress */
 
 // Compress performs a compression on vector x using mask by
@@ -2239,29 +1212,6 @@ func (x Float32x8) ConvertToInt32() Int32x8
 // Asm: VCVTTPS2DQ, CPU Feature: AVX512
 func (x Float32x16) ConvertToInt32() Int32x16
 
-/* ConvertToInt32Masked */
-
-// ConvertToInt32 converts element values to int32.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCVTTPS2DQ, CPU Feature: AVX512
-func (x Float32x4) ConvertToInt32Masked(mask Mask32x4) Int32x4
-
-// ConvertToInt32 converts element values to int32.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCVTTPS2DQ, CPU Feature: AVX512
-func (x Float32x8) ConvertToInt32Masked(mask Mask32x8) Int32x8
-
-// ConvertToInt32 converts element values to int32.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCVTTPS2DQ, CPU Feature: AVX512
-func (x Float32x16) ConvertToInt32Masked(mask Mask32x16) Int32x16
-
 /* ConvertToUint32 */
 
 // ConvertToUint32Masked converts element values to uint32.
@@ -2279,29 +1229,6 @@ func (x Float32x8) ConvertToUint32() Uint32x8
 // Asm: VCVTPS2UDQ, CPU Feature: AVX512
 func (x Float32x16) ConvertToUint32() Uint32x16
 
-/* ConvertToUint32Masked */
-
-// ConvertToUint32Masked converts element values to uint32.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCVTPS2UDQ, CPU Feature: AVX512
-func (x Float32x4) ConvertToUint32Masked(mask Mask32x4) Uint32x4
-
-// ConvertToUint32Masked converts element values to uint32.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCVTPS2UDQ, CPU Feature: AVX512
-func (x Float32x8) ConvertToUint32Masked(mask Mask32x8) Uint32x8
-
-// ConvertToUint32Masked converts element values to uint32.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCVTPS2UDQ, CPU Feature: AVX512
-func (x Float32x16) ConvertToUint32Masked(mask Mask32x16) Uint32x16
-
 /* CopySign */
 
 // CopySign returns the product of the first operand with -1, 0, or 1,
@@ -2372,50 +1299,6 @@ func (x Float64x4) Div(y Float64x4) Float64x4
 // Asm: VDIVPD, CPU Feature: AVX512
 func (x Float64x8) Div(y Float64x8) Float64x8
 
-/* DivMasked */
-
-// DivMasked divides elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VDIVPS, CPU Feature: AVX512
-func (x Float32x4) DivMasked(y Float32x4, mask Mask32x4) Float32x4
-
-// DivMasked divides elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VDIVPS, CPU Feature: AVX512
-func (x Float32x8) DivMasked(y Float32x8, mask Mask32x8) Float32x8
-
-// DivMasked divides elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VDIVPS, CPU Feature: AVX512
-func (x Float32x16) DivMasked(y Float32x16, mask Mask32x16) Float32x16
-
-// DivMasked divides elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VDIVPD, CPU Feature: AVX512
-func (x Float64x2) DivMasked(y Float64x2, mask Mask64x2) Float64x2
-
-// DivMasked divides elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VDIVPD, CPU Feature: AVX512
-func (x Float64x4) DivMasked(y Float64x4, mask Mask64x4) Float64x4
-
-// DivMasked divides elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VDIVPD, CPU Feature: AVX512
-func (x Float64x8) DivMasked(y Float64x8, mask Mask64x8) Float64x8
-
 /* DotProdPairs */
 
 // DotProdPairs multiplies the elements and add the pairs together,
@@ -2436,32 +1319,6 @@ func (x Int16x16) DotProdPairs(y Int16x16) Int32x8
 // Asm: VPMADDWD, CPU Feature: AVX512
 func (x Int16x32) DotProdPairs(y Int16x32) Int32x16
 
-/* DotProdPairsMasked */
-
-// DotProdPairsMasked multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDWD, CPU Feature: AVX512
-func (x Int16x8) DotProdPairsMasked(y Int16x8, mask Mask16x8) Int32x4
-
-// DotProdPairsMasked multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDWD, CPU Feature: AVX512
-func (x Int16x16) DotProdPairsMasked(y Int16x16, mask Mask16x16) Int32x8
-
-// DotProdPairsMasked multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDWD, CPU Feature: AVX512
-func (x Int16x32) DotProdPairsMasked(y Int16x32, mask Mask16x32) Int32x16
-
 /* DotProdPairsSaturated */
 
 // DotProdPairsSaturated multiplies the elements and add the pairs together with saturation,
@@ -2482,32 +1339,6 @@ func (x Uint8x32) DotProdPairsSaturated(y Int8x32) Int16x16
 // Asm: VPMADDUBSW, CPU Feature: AVX512
 func (x Uint8x64) DotProdPairsSaturated(y Int8x64) Int16x32
 
-/* DotProdPairsSaturatedMasked */
-
-// DotProdPairsSaturatedMasked multiplies the elements and add the pairs together with saturation,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDUBSW, CPU Feature: AVX512
-func (x Uint8x16) DotProdPairsSaturatedMasked(y Int8x16, mask Mask16x8) Int16x8
-
-// DotProdPairsSaturatedMasked multiplies the elements and add the pairs together with saturation,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDUBSW, CPU Feature: AVX512
-func (x Uint8x32) DotProdPairsSaturatedMasked(y Int8x32, mask Mask16x16) Int16x16
-
-// DotProdPairsSaturatedMasked multiplies the elements and add the pairs together with saturation,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDUBSW, CPU Feature: AVX512
-func (x Uint8x64) DotProdPairsSaturatedMasked(y Int8x64, mask Mask16x32) Int16x32
-
 /* Equal */
 
 // Equal compares for equality.
@@ -2660,218 +1491,6 @@ func (x Float64x4) Equal(y Float64x4) Mask64x4
 // Asm: VCMPPD, CPU Feature: AVX512
 func (x Float64x8) Equal(y Float64x8) Mask64x8
 
-/* EqualMasked */
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) EqualMasked(y Float32x4, mask Mask32x4) Mask32x4
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) EqualMasked(y Float32x8, mask Mask32x8) Mask32x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) EqualMasked(y Float32x16, mask Mask32x16) Mask32x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) EqualMasked(y Float64x2, mask Mask64x2) Mask64x2
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) EqualMasked(y Float64x4, mask Mask64x4) Mask64x4
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) EqualMasked(y Float64x8, mask Mask64x8) Mask64x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) EqualMasked(y Int8x16, mask Mask8x16) Mask8x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) EqualMasked(y Int8x32, mask Mask8x32) Mask8x32
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) EqualMasked(y Int8x64, mask Mask8x64) Mask8x64
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) EqualMasked(y Int16x8, mask Mask16x8) Mask16x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) EqualMasked(y Int16x16, mask Mask16x16) Mask16x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) EqualMasked(y Int16x32, mask Mask16x32) Mask16x32
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) EqualMasked(y Int32x4, mask Mask32x4) Mask32x4
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) EqualMasked(y Int32x8, mask Mask32x8) Mask32x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) EqualMasked(y Int32x16, mask Mask32x16) Mask32x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) EqualMasked(y Int64x2, mask Mask64x2) Mask64x2
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) EqualMasked(y Int64x4, mask Mask64x4) Mask64x4
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) EqualMasked(y Int64x8, mask Mask64x8) Mask64x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) EqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) EqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) EqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) EqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) EqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) EqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) EqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) EqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) EqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) EqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) EqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) EqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
-
 /* Expand */
 
 // Expand performs an expansion on a vector x whose elements are packed to lower parts.
@@ -3120,62 +1739,6 @@ func (x Float64x4) FloorScaled(prec uint8) Float64x4
 // Asm: VRNDSCALEPD, CPU Feature: AVX512
 func (x Float64x8) FloorScaled(prec uint8) Float64x8
 
-/* FloorScaledMasked */
-
-// FloorScaledMasked rounds elements down with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x4) FloorScaledMasked(prec uint8, mask Mask32x4) Float32x4
-
-// FloorScaledMasked rounds elements down with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x8) FloorScaledMasked(prec uint8, mask Mask32x8) Float32x8
-
-// FloorScaledMasked rounds elements down with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x16) FloorScaledMasked(prec uint8, mask Mask32x16) Float32x16
-
-// FloorScaledMasked rounds elements down with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x2) FloorScaledMasked(prec uint8, mask Mask64x2) Float64x2
-
-// FloorScaledMasked rounds elements down with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x4) FloorScaledMasked(prec uint8, mask Mask64x4) Float64x4
-
-// FloorScaledMasked rounds elements down with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x8) FloorScaledMasked(prec uint8, mask Mask64x8) Float64x8
-
 /* FloorScaledResidue */
 
 // FloorScaledResidue computes the difference after flooring with specified precision.
@@ -3220,62 +1783,6 @@ func (x Float64x4) FloorScaledResidue(prec uint8) Float64x4
 // Asm: VREDUCEPD, CPU Feature: AVX512
 func (x Float64x8) FloorScaledResidue(prec uint8) Float64x8
 
-/* FloorScaledResidueMasked */
-
-// FloorScaledResidueMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x4) FloorScaledResidueMasked(prec uint8, mask Mask32x4) Float32x4
-
-// FloorScaledResidueMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x8) FloorScaledResidueMasked(prec uint8, mask Mask32x8) Float32x8
-
-// FloorScaledResidueMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x16) FloorScaledResidueMasked(prec uint8, mask Mask32x16) Float32x16
-
-// FloorScaledResidueMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x2) FloorScaledResidueMasked(prec uint8, mask Mask64x2) Float64x2
-
-// FloorScaledResidueMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x4) FloorScaledResidueMasked(prec uint8, mask Mask64x4) Float64x4
-
-// FloorScaledResidueMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x8) FloorScaledResidueMasked(prec uint8, mask Mask64x8) Float64x8
-
 /* GaloisFieldAffineTransform */
 
 // GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
@@ -3343,85 +1850,6 @@ func (x Uint8x32) GaloisFieldAffineTransformInverse(y Uint64x4, b uint8) Uint8x3
 // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
 func (x Uint8x64) GaloisFieldAffineTransformInverse(y Uint64x8, b uint8) Uint8x64
 
-/* GaloisFieldAffineTransformInverseMasked */
-
-// GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
-// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
-// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-// corresponding to a group of 8 elements in x.
-//
-// This operation is applied selectively under a write mask.
-//
-// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
-func (x Uint8x16) GaloisFieldAffineTransformInverseMasked(y Uint64x2, b uint8, mask Mask8x16) Uint8x16
-
-// GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
-// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
-// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-// corresponding to a group of 8 elements in x.
-//
-// This operation is applied selectively under a write mask.
-//
-// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
-func (x Uint8x32) GaloisFieldAffineTransformInverseMasked(y Uint64x4, b uint8, mask Mask8x32) Uint8x32
-
-// GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
-// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
-// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-// corresponding to a group of 8 elements in x.
-//
-// This operation is applied selectively under a write mask.
-//
-// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
-func (x Uint8x64) GaloisFieldAffineTransformInverseMasked(y Uint64x8, b uint8, mask Mask8x64) Uint8x64
-
-/* GaloisFieldAffineTransformMasked */
-
-// GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
-// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-// corresponding to a group of 8 elements in x.
-//
-// This operation is applied selectively under a write mask.
-//
-// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
-func (x Uint8x16) GaloisFieldAffineTransformMasked(y Uint64x2, b uint8, mask Mask8x16) Uint8x16
-
-// GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
-// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-// corresponding to a group of 8 elements in x.
-//
-// This operation is applied selectively under a write mask.
-//
-// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
-func (x Uint8x32) GaloisFieldAffineTransformMasked(y Uint64x4, b uint8, mask Mask8x32) Uint8x32
-
-// GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
-// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-// corresponding to a group of 8 elements in x.
-//
-// This operation is applied selectively under a write mask.
-//
-// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
-func (x Uint8x64) GaloisFieldAffineTransformMasked(y Uint64x8, b uint8, mask Mask8x64) Uint8x64
-
 /* GaloisFieldMul */
 
 // GaloisFieldMul computes element-wise GF(2^8) multiplication with
@@ -3442,32 +1870,6 @@ func (x Uint8x32) GaloisFieldMul(y Uint8x32) Uint8x32
 // Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
 func (x Uint8x64) GaloisFieldMul(y Uint8x64) Uint8x64
 
-/* GaloisFieldMulMasked */
-
-// GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
-// reduction polynomial x^8 + x^4 + x^3 + x + 1.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
-func (x Uint8x16) GaloisFieldMulMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
-// reduction polynomial x^8 + x^4 + x^3 + x + 1.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
-func (x Uint8x32) GaloisFieldMulMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
-// reduction polynomial x^8 + x^4 + x^3 + x + 1.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
-func (x Uint8x64) GaloisFieldMulMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
 /* GetElem */
 
 // GetElem retrieves a single constant-indexed element's value.
@@ -3928,430 +2330,6 @@ func (x Uint32x16) GreaterEqual(y Uint32x16) Mask32x16
 // Asm: VPCMPUQ, CPU Feature: AVX512
 func (x Uint64x8) GreaterEqual(y Uint64x8) Mask64x8
 
-/* GreaterEqualMasked */
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) GreaterEqualMasked(y Float32x4, mask Mask32x4) Mask32x4
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) GreaterEqualMasked(y Float32x8, mask Mask32x8) Mask32x8
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) GreaterEqualMasked(y Float32x16, mask Mask32x16) Mask32x16
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) GreaterEqualMasked(y Float64x2, mask Mask64x2) Mask64x2
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) GreaterEqualMasked(y Float64x4, mask Mask64x4) Mask64x4
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) GreaterEqualMasked(y Float64x8, mask Mask64x8) Mask64x8
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) GreaterEqualMasked(y Int8x16, mask Mask8x16) Mask8x16
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) GreaterEqualMasked(y Int8x32, mask Mask8x32) Mask8x32
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) GreaterEqualMasked(y Int8x64, mask Mask8x64) Mask8x64
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) GreaterEqualMasked(y Int16x8, mask Mask16x8) Mask16x8
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) GreaterEqualMasked(y Int16x16, mask Mask16x16) Mask16x16
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) GreaterEqualMasked(y Int16x32, mask Mask16x32) Mask16x32
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) GreaterEqualMasked(y Int32x4, mask Mask32x4) Mask32x4
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) GreaterEqualMasked(y Int32x8, mask Mask32x8) Mask32x8
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) GreaterEqualMasked(y Int32x16, mask Mask32x16) Mask32x16
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) GreaterEqualMasked(y Int64x2, mask Mask64x2) Mask64x2
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) GreaterEqualMasked(y Int64x4, mask Mask64x4) Mask64x4
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) GreaterEqualMasked(y Int64x8, mask Mask64x8) Mask64x8
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) GreaterEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) GreaterEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) GreaterEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) GreaterEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) GreaterEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) GreaterEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) GreaterEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) GreaterEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) GreaterEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) GreaterEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) GreaterEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
-
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) GreaterEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
-
-/* GreaterMasked */
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) GreaterMasked(y Float32x4, mask Mask32x4) Mask32x4
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) GreaterMasked(y Float32x8, mask Mask32x8) Mask32x8
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) GreaterMasked(y Float32x16, mask Mask32x16) Mask32x16
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) GreaterMasked(y Float64x2, mask Mask64x2) Mask64x2
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) GreaterMasked(y Float64x4, mask Mask64x4) Mask64x4
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) GreaterMasked(y Float64x8, mask Mask64x8) Mask64x8
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) GreaterMasked(y Int8x16, mask Mask8x16) Mask8x16
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) GreaterMasked(y Int8x32, mask Mask8x32) Mask8x32
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) GreaterMasked(y Int8x64, mask Mask8x64) Mask8x64
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) GreaterMasked(y Int16x8, mask Mask16x8) Mask16x8
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) GreaterMasked(y Int16x16, mask Mask16x16) Mask16x16
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) GreaterMasked(y Int16x32, mask Mask16x32) Mask16x32
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) GreaterMasked(y Int32x4, mask Mask32x4) Mask32x4
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) GreaterMasked(y Int32x8, mask Mask32x8) Mask32x8
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) GreaterMasked(y Int32x16, mask Mask32x16) Mask32x16
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) GreaterMasked(y Int64x2, mask Mask64x2) Mask64x2
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) GreaterMasked(y Int64x4, mask Mask64x4) Mask64x4
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) GreaterMasked(y Int64x8, mask Mask64x8) Mask64x8
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) GreaterMasked(y Uint8x16, mask Mask8x16) Mask8x16
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) GreaterMasked(y Uint8x32, mask Mask8x32) Mask8x32
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) GreaterMasked(y Uint8x64, mask Mask8x64) Mask8x64
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) GreaterMasked(y Uint16x8, mask Mask16x8) Mask16x8
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) GreaterMasked(y Uint16x16, mask Mask16x16) Mask16x16
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) GreaterMasked(y Uint16x32, mask Mask16x32) Mask16x32
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) GreaterMasked(y Uint32x4, mask Mask32x4) Mask32x4
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) GreaterMasked(y Uint32x8, mask Mask32x8) Mask32x8
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) GreaterMasked(y Uint32x16, mask Mask32x16) Mask32x16
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) GreaterMasked(y Uint64x2, mask Mask64x2) Mask64x2
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) GreaterMasked(y Uint64x4, mask Mask64x4) Mask64x4
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) GreaterMasked(y Uint64x8, mask Mask64x8) Mask64x8
-
 /* IsNan */
 
 // IsNan checks if elements are NaN. Use as x.IsNan(x).
@@ -4384,50 +2362,6 @@ func (x Float64x4) IsNan(y Float64x4) Mask64x4
 // Asm: VCMPPD, CPU Feature: AVX512
 func (x Float64x8) IsNan(y Float64x8) Mask64x8
 
-/* IsNanMasked */
-
-// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) IsNanMasked(y Float32x4, mask Mask32x4) Mask32x4
-
-// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) IsNanMasked(y Float32x8, mask Mask32x8) Mask32x8
-
-// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) IsNanMasked(y Float32x16, mask Mask32x16) Mask32x16
-
-// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) IsNanMasked(y Float64x2, mask Mask64x2) Mask64x2
-
-// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) IsNanMasked(y Float64x4, mask Mask64x4) Mask64x4
-
-// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) IsNanMasked(y Float64x8, mask Mask64x8) Mask64x8
-
 /* Less */
 
 // Less compares for less than.
@@ -4572,430 +2506,6 @@ func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16
 // Asm: VPCMPUQ, CPU Feature: AVX512
 func (x Uint64x8) LessEqual(y Uint64x8) Mask64x8
 
-/* LessEqualMasked */
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) LessEqualMasked(y Float32x4, mask Mask32x4) Mask32x4
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) LessEqualMasked(y Float32x8, mask Mask32x8) Mask32x8
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) LessEqualMasked(y Float32x16, mask Mask32x16) Mask32x16
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) LessEqualMasked(y Float64x2, mask Mask64x2) Mask64x2
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) LessEqualMasked(y Float64x4, mask Mask64x4) Mask64x4
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) LessEqualMasked(y Float64x8, mask Mask64x8) Mask64x8
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) LessEqualMasked(y Int8x16, mask Mask8x16) Mask8x16
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) LessEqualMasked(y Int8x32, mask Mask8x32) Mask8x32
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) LessEqualMasked(y Int8x64, mask Mask8x64) Mask8x64
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) LessEqualMasked(y Int16x8, mask Mask16x8) Mask16x8
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) LessEqualMasked(y Int16x16, mask Mask16x16) Mask16x16
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) LessEqualMasked(y Int16x32, mask Mask16x32) Mask16x32
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) LessEqualMasked(y Int32x4, mask Mask32x4) Mask32x4
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) LessEqualMasked(y Int32x8, mask Mask32x8) Mask32x8
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) LessEqualMasked(y Int32x16, mask Mask32x16) Mask32x16
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) LessEqualMasked(y Int64x2, mask Mask64x2) Mask64x2
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) LessEqualMasked(y Int64x4, mask Mask64x4) Mask64x4
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) LessEqualMasked(y Int64x8, mask Mask64x8) Mask64x8
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) LessEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) LessEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) LessEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) LessEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) LessEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) LessEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) LessEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) LessEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) LessEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) LessEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) LessEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
-
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) LessEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
-
-/* LessMasked */
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) LessMasked(y Float32x4, mask Mask32x4) Mask32x4
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) LessMasked(y Float32x8, mask Mask32x8) Mask32x8
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) LessMasked(y Float32x16, mask Mask32x16) Mask32x16
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) LessMasked(y Float64x2, mask Mask64x2) Mask64x2
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) LessMasked(y Float64x4, mask Mask64x4) Mask64x4
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) LessMasked(y Float64x8, mask Mask64x8) Mask64x8
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) LessMasked(y Int8x16, mask Mask8x16) Mask8x16
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) LessMasked(y Int8x32, mask Mask8x32) Mask8x32
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) LessMasked(y Int8x64, mask Mask8x64) Mask8x64
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) LessMasked(y Int16x8, mask Mask16x8) Mask16x8
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) LessMasked(y Int16x16, mask Mask16x16) Mask16x16
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) LessMasked(y Int16x32, mask Mask16x32) Mask16x32
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) LessMasked(y Int32x4, mask Mask32x4) Mask32x4
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) LessMasked(y Int32x8, mask Mask32x8) Mask32x8
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) LessMasked(y Int32x16, mask Mask32x16) Mask32x16
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) LessMasked(y Int64x2, mask Mask64x2) Mask64x2
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) LessMasked(y Int64x4, mask Mask64x4) Mask64x4
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) LessMasked(y Int64x8, mask Mask64x8) Mask64x8
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) LessMasked(y Uint8x16, mask Mask8x16) Mask8x16
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) LessMasked(y Uint8x32, mask Mask8x32) Mask8x32
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) LessMasked(y Uint8x64, mask Mask8x64) Mask8x64
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) LessMasked(y Uint16x8, mask Mask16x8) Mask16x8
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) LessMasked(y Uint16x16, mask Mask16x16) Mask16x16
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) LessMasked(y Uint16x32, mask Mask16x32) Mask16x32
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) LessMasked(y Uint32x4, mask Mask32x4) Mask32x4
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) LessMasked(y Uint32x8, mask Mask32x8) Mask32x8
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) LessMasked(y Uint32x16, mask Mask32x16) Mask32x16
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) LessMasked(y Uint64x2, mask Mask64x2) Mask64x2
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) LessMasked(y Uint64x4, mask Mask64x4) Mask64x4
-
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) LessMasked(y Uint64x8, mask Mask64x8) Mask64x8
-
 /* Max */
 
 // Max computes the maximum of corresponding elements.
@@ -5148,218 +2658,6 @@ func (x Uint64x4) Max(y Uint64x4) Uint64x4
 // Asm: VPMAXUQ, CPU Feature: AVX512
 func (x Uint64x8) Max(y Uint64x8) Uint64x8
 
-/* MaxMasked */
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMAXPS, CPU Feature: AVX512
-func (x Float32x4) MaxMasked(y Float32x4, mask Mask32x4) Float32x4
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMAXPS, CPU Feature: AVX512
-func (x Float32x8) MaxMasked(y Float32x8, mask Mask32x8) Float32x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMAXPS, CPU Feature: AVX512
-func (x Float32x16) MaxMasked(y Float32x16, mask Mask32x16) Float32x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMAXPD, CPU Feature: AVX512
-func (x Float64x2) MaxMasked(y Float64x2, mask Mask64x2) Float64x2
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMAXPD, CPU Feature: AVX512
-func (x Float64x4) MaxMasked(y Float64x4, mask Mask64x4) Float64x4
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMAXPD, CPU Feature: AVX512
-func (x Float64x8) MaxMasked(y Float64x8, mask Mask64x8) Float64x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSB, CPU Feature: AVX512
-func (x Int8x16) MaxMasked(y Int8x16, mask Mask8x16) Int8x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSB, CPU Feature: AVX512
-func (x Int8x32) MaxMasked(y Int8x32, mask Mask8x32) Int8x32
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSB, CPU Feature: AVX512
-func (x Int8x64) MaxMasked(y Int8x64, mask Mask8x64) Int8x64
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSW, CPU Feature: AVX512
-func (x Int16x8) MaxMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSW, CPU Feature: AVX512
-func (x Int16x16) MaxMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSW, CPU Feature: AVX512
-func (x Int16x32) MaxMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSD, CPU Feature: AVX512
-func (x Int32x4) MaxMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSD, CPU Feature: AVX512
-func (x Int32x8) MaxMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSD, CPU Feature: AVX512
-func (x Int32x16) MaxMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSQ, CPU Feature: AVX512
-func (x Int64x2) MaxMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSQ, CPU Feature: AVX512
-func (x Int64x4) MaxMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSQ, CPU Feature: AVX512
-func (x Int64x8) MaxMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUB, CPU Feature: AVX512
-func (x Uint8x16) MaxMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUB, CPU Feature: AVX512
-func (x Uint8x32) MaxMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUB, CPU Feature: AVX512
-func (x Uint8x64) MaxMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUW, CPU Feature: AVX512
-func (x Uint16x8) MaxMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUW, CPU Feature: AVX512
-func (x Uint16x16) MaxMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUW, CPU Feature: AVX512
-func (x Uint16x32) MaxMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUD, CPU Feature: AVX512
-func (x Uint32x4) MaxMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUD, CPU Feature: AVX512
-func (x Uint32x8) MaxMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUD, CPU Feature: AVX512
-func (x Uint32x16) MaxMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUQ, CPU Feature: AVX512
-func (x Uint64x2) MaxMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUQ, CPU Feature: AVX512
-func (x Uint64x4) MaxMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUQ, CPU Feature: AVX512
-func (x Uint64x8) MaxMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* Min */
 
 // Min computes the minimum of corresponding elements.
@@ -5512,218 +2810,6 @@ func (x Uint64x4) Min(y Uint64x4) Uint64x4
 // Asm: VPMINUQ, CPU Feature: AVX512
 func (x Uint64x8) Min(y Uint64x8) Uint64x8
 
-/* MinMasked */
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMINPS, CPU Feature: AVX512
-func (x Float32x4) MinMasked(y Float32x4, mask Mask32x4) Float32x4
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMINPS, CPU Feature: AVX512
-func (x Float32x8) MinMasked(y Float32x8, mask Mask32x8) Float32x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMINPS, CPU Feature: AVX512
-func (x Float32x16) MinMasked(y Float32x16, mask Mask32x16) Float32x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMINPD, CPU Feature: AVX512
-func (x Float64x2) MinMasked(y Float64x2, mask Mask64x2) Float64x2
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMINPD, CPU Feature: AVX512
-func (x Float64x4) MinMasked(y Float64x4, mask Mask64x4) Float64x4
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMINPD, CPU Feature: AVX512
-func (x Float64x8) MinMasked(y Float64x8, mask Mask64x8) Float64x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSB, CPU Feature: AVX512
-func (x Int8x16) MinMasked(y Int8x16, mask Mask8x16) Int8x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSB, CPU Feature: AVX512
-func (x Int8x32) MinMasked(y Int8x32, mask Mask8x32) Int8x32
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSB, CPU Feature: AVX512
-func (x Int8x64) MinMasked(y Int8x64, mask Mask8x64) Int8x64
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSW, CPU Feature: AVX512
-func (x Int16x8) MinMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSW, CPU Feature: AVX512
-func (x Int16x16) MinMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSW, CPU Feature: AVX512
-func (x Int16x32) MinMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSD, CPU Feature: AVX512
-func (x Int32x4) MinMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSD, CPU Feature: AVX512
-func (x Int32x8) MinMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSD, CPU Feature: AVX512
-func (x Int32x16) MinMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSQ, CPU Feature: AVX512
-func (x Int64x2) MinMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSQ, CPU Feature: AVX512
-func (x Int64x4) MinMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSQ, CPU Feature: AVX512
-func (x Int64x8) MinMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUB, CPU Feature: AVX512
-func (x Uint8x16) MinMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUB, CPU Feature: AVX512
-func (x Uint8x32) MinMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUB, CPU Feature: AVX512
-func (x Uint8x64) MinMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUW, CPU Feature: AVX512
-func (x Uint16x8) MinMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUW, CPU Feature: AVX512
-func (x Uint16x16) MinMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUW, CPU Feature: AVX512
-func (x Uint16x32) MinMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUD, CPU Feature: AVX512
-func (x Uint32x4) MinMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUD, CPU Feature: AVX512
-func (x Uint32x8) MinMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUD, CPU Feature: AVX512
-func (x Uint32x16) MinMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUQ, CPU Feature: AVX512
-func (x Uint64x2) MinMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUQ, CPU Feature: AVX512
-func (x Uint64x4) MinMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUQ, CPU Feature: AVX512
-func (x Uint64x8) MinMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* Mul */
 
 // Mul multiplies corresponding elements of two vectors.
@@ -5878,50 +2964,6 @@ func (x Float64x4) MulAdd(y Float64x4, z Float64x4) Float64x4
 // Asm: VFMADD213PD, CPU Feature: AVX512
 func (x Float64x8) MulAdd(y Float64x8, z Float64x8) Float64x8
 
-/* MulAddMasked */
-
-// MulAddMasked performs a fused (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PS, CPU Feature: AVX512
-func (x Float32x4) MulAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
-
-// MulAddMasked performs a fused (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PS, CPU Feature: AVX512
-func (x Float32x8) MulAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
-
-// MulAddMasked performs a fused (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PS, CPU Feature: AVX512
-func (x Float32x16) MulAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
-
-// MulAddMasked performs a fused (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PD, CPU Feature: AVX512
-func (x Float64x2) MulAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
-
-// MulAddMasked performs a fused (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PD, CPU Feature: AVX512
-func (x Float64x4) MulAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
-
-// MulAddMasked performs a fused (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PD, CPU Feature: AVX512
-func (x Float64x8) MulAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
-
 /* MulAddSub */
 
 // MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
@@ -5954,50 +2996,6 @@ func (x Float64x4) MulAddSub(y Float64x4, z Float64x4) Float64x4
 // Asm: VFMADDSUB213PD, CPU Feature: AVX512
 func (x Float64x8) MulAddSub(y Float64x8, z Float64x8) Float64x8
 
-/* MulAddSubMasked */
-
-// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512
-func (x Float32x4) MulAddSubMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
-
-// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512
-func (x Float32x8) MulAddSubMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
-
-// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512
-func (x Float32x16) MulAddSubMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
-
-// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512
-func (x Float64x2) MulAddSubMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
-
-// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512
-func (x Float64x4) MulAddSubMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
-
-// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512
-func (x Float64x8) MulAddSubMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
-
 /* MulEvenWiden */
 
 // MulEvenWiden multiplies even-indexed elements, widening the result.
@@ -6056,220 +3054,6 @@ func (x Uint16x16) MulHigh(y Uint16x16) Uint16x16
 // Asm: VPMULHUW, CPU Feature: AVX512
 func (x Uint16x32) MulHigh(y Uint16x32) Uint16x32
 
-/* MulHighMasked */
-
-// MulHighMasked multiplies elements and stores the high part of the result.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULHW, CPU Feature: AVX512
-func (x Int16x8) MulHighMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// MulHighMasked multiplies elements and stores the high part of the result.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULHW, CPU Feature: AVX512
-func (x Int16x16) MulHighMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// MulHighMasked multiplies elements and stores the high part of the result.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULHW, CPU Feature: AVX512
-func (x Int16x32) MulHighMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// MulHighMasked multiplies elements and stores the high part of the result.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULHUW, CPU Feature: AVX512
-func (x Uint16x8) MulHighMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// MulHighMasked multiplies elements and stores the high part of the result.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULHUW, CPU Feature: AVX512
-func (x Uint16x16) MulHighMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// MulHighMasked multiplies elements and stores the high part of the result.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULHUW, CPU Feature: AVX512
-func (x Uint16x32) MulHighMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-/* MulMasked */
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMULPS, CPU Feature: AVX512
-func (x Float32x4) MulMasked(y Float32x4, mask Mask32x4) Float32x4
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMULPS, CPU Feature: AVX512
-func (x Float32x8) MulMasked(y Float32x8, mask Mask32x8) Float32x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMULPS, CPU Feature: AVX512
-func (x Float32x16) MulMasked(y Float32x16, mask Mask32x16) Float32x16
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMULPD, CPU Feature: AVX512
-func (x Float64x2) MulMasked(y Float64x2, mask Mask64x2) Float64x2
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMULPD, CPU Feature: AVX512
-func (x Float64x4) MulMasked(y Float64x4, mask Mask64x4) Float64x4
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMULPD, CPU Feature: AVX512
-func (x Float64x8) MulMasked(y Float64x8, mask Mask64x8) Float64x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Int16x8) MulMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Int16x16) MulMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Int16x32) MulMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Int32x4) MulMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Int32x8) MulMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Int32x16) MulMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Int64x2) MulMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Int64x4) MulMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Int64x8) MulMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Uint16x8) MulMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Uint16x16) MulMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Uint16x32) MulMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Uint32x4) MulMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Uint32x8) MulMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Uint32x16) MulMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Uint64x2) MulMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Uint64x4) MulMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Uint64x8) MulMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* MulSubAdd */
 
 // MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
@@ -6302,50 +3086,6 @@ func (x Float64x4) MulSubAdd(y Float64x4, z Float64x4) Float64x4
 // Asm: VFMSUBADD213PD, CPU Feature: AVX512
 func (x Float64x8) MulSubAdd(y Float64x8, z Float64x8) Float64x8
 
-/* MulSubAddMasked */
-
-// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512
-func (x Float32x4) MulSubAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
-
-// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512
-func (x Float32x8) MulSubAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
-
-// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512
-func (x Float32x16) MulSubAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
-
-// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512
-func (x Float64x2) MulSubAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
-
-// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512
-func (x Float64x4) MulSubAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
-
-// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512
-func (x Float64x8) MulSubAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
-
 /* NotEqual */
 
 // NotEqual compares for inequality.
@@ -6418,218 +3158,6 @@ func (x Uint32x16) NotEqual(y Uint32x16) Mask32x16
 // Asm: VPCMPUQ, CPU Feature: AVX512
 func (x Uint64x8) NotEqual(y Uint64x8) Mask64x8
 
-/* NotEqualMasked */
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) NotEqualMasked(y Float32x4, mask Mask32x4) Mask32x4
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) NotEqualMasked(y Float32x8, mask Mask32x8) Mask32x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) NotEqualMasked(y Float32x16, mask Mask32x16) Mask32x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) NotEqualMasked(y Float64x2, mask Mask64x2) Mask64x2
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) NotEqualMasked(y Float64x4, mask Mask64x4) Mask64x4
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) NotEqualMasked(y Float64x8, mask Mask64x8) Mask64x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) NotEqualMasked(y Int8x16, mask Mask8x16) Mask8x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) NotEqualMasked(y Int8x32, mask Mask8x32) Mask8x32
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) NotEqualMasked(y Int8x64, mask Mask8x64) Mask8x64
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) NotEqualMasked(y Int16x8, mask Mask16x8) Mask16x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) NotEqualMasked(y Int16x16, mask Mask16x16) Mask16x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) NotEqualMasked(y Int16x32, mask Mask16x32) Mask16x32
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) NotEqualMasked(y Int32x4, mask Mask32x4) Mask32x4
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) NotEqualMasked(y Int32x8, mask Mask32x8) Mask32x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) NotEqualMasked(y Int32x16, mask Mask32x16) Mask32x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) NotEqualMasked(y Int64x2, mask Mask64x2) Mask64x2
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) NotEqualMasked(y Int64x4, mask Mask64x4) Mask64x4
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) NotEqualMasked(y Int64x8, mask Mask64x8) Mask64x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) NotEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) NotEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) NotEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) NotEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) NotEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) NotEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) NotEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) NotEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) NotEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) NotEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) NotEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) NotEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
-
 /* OnesCount */
 
 // OnesCount counts the number of set bits in each element.
@@ -6752,176 +3280,6 @@ func (x Uint64x4) OnesCount() Uint64x4
 // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
 func (x Uint64x8) OnesCount() Uint64x8
 
-/* OnesCountMasked */
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x16) OnesCountMasked(mask Mask8x16) Int8x16
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x32) OnesCountMasked(mask Mask8x32) Int8x32
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x64) OnesCountMasked(mask Mask8x64) Int8x64
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x8) OnesCountMasked(mask Mask16x8) Int16x8
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x16) OnesCountMasked(mask Mask16x16) Int16x16
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x32) OnesCountMasked(mask Mask16x32) Int16x32
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x4) OnesCountMasked(mask Mask32x4) Int32x4
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x8) OnesCountMasked(mask Mask32x8) Int32x8
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x16) OnesCountMasked(mask Mask32x16) Int32x16
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x2) OnesCountMasked(mask Mask64x2) Int64x2
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x4) OnesCountMasked(mask Mask64x4) Int64x4
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x8) OnesCountMasked(mask Mask64x8) Int64x8
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x16) OnesCountMasked(mask Mask8x16) Uint8x16
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x32) OnesCountMasked(mask Mask8x32) Uint8x32
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x64) OnesCountMasked(mask Mask8x64) Uint8x64
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x8) OnesCountMasked(mask Mask16x8) Uint16x8
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x16) OnesCountMasked(mask Mask16x16) Uint16x16
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x32) OnesCountMasked(mask Mask16x32) Uint16x32
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x4) OnesCountMasked(mask Mask32x4) Uint32x4
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x8) OnesCountMasked(mask Mask32x8) Uint32x8
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x16) OnesCountMasked(mask Mask32x16) Uint32x16
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x2) OnesCountMasked(mask Mask64x2) Uint64x2
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x4) OnesCountMasked(mask Mask64x4) Uint64x4
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x8) OnesCountMasked(mask Mask64x8) Uint64x8
-
 /* Or */
 
 // Or performs a bitwise OR operation between two vectors.
@@ -7044,92 +3402,6 @@ func (x Uint64x4) Or(y Uint64x4) Uint64x4
 // Asm: VPORQ, CPU Feature: AVX512
 func (x Uint64x8) Or(y Uint64x8) Uint64x8
 
-/* OrMasked */
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Int32x4) OrMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Int32x8) OrMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Int32x16) OrMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Int64x2) OrMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Int64x4) OrMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Int64x8) OrMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Uint32x4) OrMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Uint32x8) OrMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Uint32x16) OrMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Uint64x2) OrMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Uint64x4) OrMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Uint64x8) OrMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* Permute */
 
 // Permute performs a full permutation of vector x using indices:
@@ -7542,526 +3814,6 @@ func (x Int64x8) Permute2(y Int64x8, indices Uint64x8) Int64x8
 // Asm: VPERMI2Q, CPU Feature: AVX512
 func (x Uint64x8) Permute2(y Uint64x8, indices Uint64x8) Uint64x8
 
-/* Permute2Masked */
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Int8x16) Permute2Masked(y Int8x16, indices Uint8x16, mask Mask8x16) Int8x16
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Uint8x16) Permute2Masked(y Uint8x16, indices Uint8x16, mask Mask8x16) Uint8x16
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Int8x32) Permute2Masked(y Int8x32, indices Uint8x32, mask Mask8x32) Int8x32
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Uint8x32) Permute2Masked(y Uint8x32, indices Uint8x32, mask Mask8x32) Uint8x32
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Int8x64) Permute2Masked(y Int8x64, indices Uint8x64, mask Mask8x64) Int8x64
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Uint8x64) Permute2Masked(y Uint8x64, indices Uint8x64, mask Mask8x64) Uint8x64
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Int16x8) Permute2Masked(y Int16x8, indices Uint16x8, mask Mask16x8) Int16x8
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Uint16x8) Permute2Masked(y Uint16x8, indices Uint16x8, mask Mask16x8) Uint16x8
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Int16x16) Permute2Masked(y Int16x16, indices Uint16x16, mask Mask16x16) Int16x16
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Uint16x16) Permute2Masked(y Uint16x16, indices Uint16x16, mask Mask16x16) Uint16x16
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Int16x32) Permute2Masked(y Int16x32, indices Uint16x32, mask Mask16x32) Int16x32
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Uint16x32) Permute2Masked(y Uint16x32, indices Uint16x32, mask Mask16x32) Uint16x32
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2PS, CPU Feature: AVX512
-func (x Float32x4) Permute2Masked(y Float32x4, indices Uint32x4, mask Mask32x4) Float32x4
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Int32x4) Permute2Masked(y Int32x4, indices Uint32x4, mask Mask32x4) Int32x4
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Uint32x4) Permute2Masked(y Uint32x4, indices Uint32x4, mask Mask32x4) Uint32x4
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2PS, CPU Feature: AVX512
-func (x Float32x8) Permute2Masked(y Float32x8, indices Uint32x8, mask Mask32x8) Float32x8
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Int32x8) Permute2Masked(y Int32x8, indices Uint32x8, mask Mask32x8) Int32x8
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Uint32x8) Permute2Masked(y Uint32x8, indices Uint32x8, mask Mask32x8) Uint32x8
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2PS, CPU Feature: AVX512
-func (x Float32x16) Permute2Masked(y Float32x16, indices Uint32x16, mask Mask32x16) Float32x16
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Int32x16) Permute2Masked(y Int32x16, indices Uint32x16, mask Mask32x16) Int32x16
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Uint32x16) Permute2Masked(y Uint32x16, indices Uint32x16, mask Mask32x16) Uint32x16
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2PD, CPU Feature: AVX512
-func (x Float64x2) Permute2Masked(y Float64x2, indices Uint64x2, mask Mask64x2) Float64x2
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Int64x2) Permute2Masked(y Int64x2, indices Uint64x2, mask Mask64x2) Int64x2
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Uint64x2) Permute2Masked(y Uint64x2, indices Uint64x2, mask Mask64x2) Uint64x2
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2PD, CPU Feature: AVX512
-func (x Float64x4) Permute2Masked(y Float64x4, indices Uint64x4, mask Mask64x4) Float64x4
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Int64x4) Permute2Masked(y Int64x4, indices Uint64x4, mask Mask64x4) Int64x4
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Uint64x4) Permute2Masked(y Uint64x4, indices Uint64x4, mask Mask64x4) Uint64x4
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2PD, CPU Feature: AVX512
-func (x Float64x8) Permute2Masked(y Float64x8, indices Uint64x8, mask Mask64x8) Float64x8
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Int64x8) Permute2Masked(y Int64x8, indices Uint64x8, mask Mask64x8) Int64x8
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Uint64x8) Permute2Masked(y Uint64x8, indices Uint64x8, mask Mask64x8) Uint64x8
-
-/* PermuteMasked */
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Int8x16) PermuteMasked(indices Uint8x16, mask Mask8x16) Int8x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Uint8x16) PermuteMasked(indices Uint8x16, mask Mask8x16) Uint8x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Int8x32) PermuteMasked(indices Uint8x32, mask Mask8x32) Int8x32
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Uint8x32) PermuteMasked(indices Uint8x32, mask Mask8x32) Uint8x32
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Int8x64) PermuteMasked(indices Uint8x64, mask Mask8x64) Int8x64
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Uint8x64) PermuteMasked(indices Uint8x64, mask Mask8x64) Uint8x64
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Int16x8) PermuteMasked(indices Uint16x8, mask Mask16x8) Int16x8
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Uint16x8) PermuteMasked(indices Uint16x8, mask Mask16x8) Uint16x8
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Int16x16) PermuteMasked(indices Uint16x16, mask Mask16x16) Int16x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Uint16x16) PermuteMasked(indices Uint16x16, mask Mask16x16) Uint16x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Int16x32) PermuteMasked(indices Uint16x32, mask Mask16x32) Int16x32
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Uint16x32) PermuteMasked(indices Uint16x32, mask Mask16x32) Uint16x32
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMPS, CPU Feature: AVX512
-func (x Float32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Float32x8
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMD, CPU Feature: AVX512
-func (x Int32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Int32x8
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMD, CPU Feature: AVX512
-func (x Uint32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Uint32x8
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMPS, CPU Feature: AVX512
-func (x Float32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Float32x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMD, CPU Feature: AVX512
-func (x Int32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Int32x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMD, CPU Feature: AVX512
-func (x Uint32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Uint32x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMPD, CPU Feature: AVX512
-func (x Float64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Float64x4
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMQ, CPU Feature: AVX512
-func (x Int64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Int64x4
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMQ, CPU Feature: AVX512
-func (x Uint64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Uint64x4
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMPD, CPU Feature: AVX512
-func (x Float64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Float64x8
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMQ, CPU Feature: AVX512
-func (x Int64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Int64x8
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMQ, CPU Feature: AVX512
-func (x Uint64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Uint64x8
-
 /* Reciprocal */
 
 // Reciprocal computes an approximate reciprocal of each element.
@@ -8094,50 +3846,6 @@ func (x Float64x4) Reciprocal() Float64x4
 // Asm: VRCP14PD, CPU Feature: AVX512
 func (x Float64x8) Reciprocal() Float64x8
 
-/* ReciprocalMasked */
-
-// ReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PS, CPU Feature: AVX512
-func (x Float32x4) ReciprocalMasked(mask Mask32x4) Float32x4
-
-// ReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PS, CPU Feature: AVX512
-func (x Float32x8) ReciprocalMasked(mask Mask32x8) Float32x8
-
-// ReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PS, CPU Feature: AVX512
-func (x Float32x16) ReciprocalMasked(mask Mask32x16) Float32x16
-
-// ReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PD, CPU Feature: AVX512
-func (x Float64x2) ReciprocalMasked(mask Mask64x2) Float64x2
-
-// ReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PD, CPU Feature: AVX512
-func (x Float64x4) ReciprocalMasked(mask Mask64x4) Float64x4
-
-// ReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PD, CPU Feature: AVX512
-func (x Float64x8) ReciprocalMasked(mask Mask64x8) Float64x8
-
 /* ReciprocalSqrt */
 
 // ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
@@ -8170,50 +3878,6 @@ func (x Float64x4) ReciprocalSqrt() Float64x4
 // Asm: VRSQRT14PD, CPU Feature: AVX512
 func (x Float64x8) ReciprocalSqrt() Float64x8
 
-/* ReciprocalSqrtMasked */
-
-// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PS, CPU Feature: AVX512
-func (x Float32x4) ReciprocalSqrtMasked(mask Mask32x4) Float32x4
-
-// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PS, CPU Feature: AVX512
-func (x Float32x8) ReciprocalSqrtMasked(mask Mask32x8) Float32x8
-
-// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PS, CPU Feature: AVX512
-func (x Float32x16) ReciprocalSqrtMasked(mask Mask32x16) Float32x16
-
-// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PD, CPU Feature: AVX512
-func (x Float64x2) ReciprocalSqrtMasked(mask Mask64x2) Float64x2
-
-// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PD, CPU Feature: AVX512
-func (x Float64x4) ReciprocalSqrtMasked(mask Mask64x4) Float64x4
-
-// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PD, CPU Feature: AVX512
-func (x Float64x8) ReciprocalSqrtMasked(mask Mask64x8) Float64x8
-
 /* RotateAllLeft */
 
 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
@@ -8300,116 +3964,6 @@ func (x Uint64x4) RotateAllLeft(shift uint8) Uint64x4
 // Asm: VPROLQ, CPU Feature: AVX512
 func (x Uint64x8) RotateAllLeft(shift uint8) Uint64x8
 
-/* RotateAllLeftMasked */
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLD, CPU Feature: AVX512
-func (x Int32x4) RotateAllLeftMasked(shift uint8, mask Mask32x4) Int32x4
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLD, CPU Feature: AVX512
-func (x Int32x8) RotateAllLeftMasked(shift uint8, mask Mask32x8) Int32x8
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLD, CPU Feature: AVX512
-func (x Int32x16) RotateAllLeftMasked(shift uint8, mask Mask32x16) Int32x16
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLQ, CPU Feature: AVX512
-func (x Int64x2) RotateAllLeftMasked(shift uint8, mask Mask64x2) Int64x2
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLQ, CPU Feature: AVX512
-func (x Int64x4) RotateAllLeftMasked(shift uint8, mask Mask64x4) Int64x4
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLQ, CPU Feature: AVX512
-func (x Int64x8) RotateAllLeftMasked(shift uint8, mask Mask64x8) Int64x8
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLD, CPU Feature: AVX512
-func (x Uint32x4) RotateAllLeftMasked(shift uint8, mask Mask32x4) Uint32x4
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLD, CPU Feature: AVX512
-func (x Uint32x8) RotateAllLeftMasked(shift uint8, mask Mask32x8) Uint32x8
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLD, CPU Feature: AVX512
-func (x Uint32x16) RotateAllLeftMasked(shift uint8, mask Mask32x16) Uint32x16
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLQ, CPU Feature: AVX512
-func (x Uint64x2) RotateAllLeftMasked(shift uint8, mask Mask64x2) Uint64x2
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLQ, CPU Feature: AVX512
-func (x Uint64x4) RotateAllLeftMasked(shift uint8, mask Mask64x4) Uint64x4
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLQ, CPU Feature: AVX512
-func (x Uint64x8) RotateAllLeftMasked(shift uint8, mask Mask64x8) Uint64x8
-
 /* RotateAllRight */
 
 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
@@ -8496,116 +4050,6 @@ func (x Uint64x4) RotateAllRight(shift uint8) Uint64x4
 // Asm: VPRORQ, CPU Feature: AVX512
 func (x Uint64x8) RotateAllRight(shift uint8) Uint64x8
 
-/* RotateAllRightMasked */
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORD, CPU Feature: AVX512
-func (x Int32x4) RotateAllRightMasked(shift uint8, mask Mask32x4) Int32x4
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORD, CPU Feature: AVX512
-func (x Int32x8) RotateAllRightMasked(shift uint8, mask Mask32x8) Int32x8
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORD, CPU Feature: AVX512
-func (x Int32x16) RotateAllRightMasked(shift uint8, mask Mask32x16) Int32x16
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORQ, CPU Feature: AVX512
-func (x Int64x2) RotateAllRightMasked(shift uint8, mask Mask64x2) Int64x2
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORQ, CPU Feature: AVX512
-func (x Int64x4) RotateAllRightMasked(shift uint8, mask Mask64x4) Int64x4
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORQ, CPU Feature: AVX512
-func (x Int64x8) RotateAllRightMasked(shift uint8, mask Mask64x8) Int64x8
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORD, CPU Feature: AVX512
-func (x Uint32x4) RotateAllRightMasked(shift uint8, mask Mask32x4) Uint32x4
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORD, CPU Feature: AVX512
-func (x Uint32x8) RotateAllRightMasked(shift uint8, mask Mask32x8) Uint32x8
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORD, CPU Feature: AVX512
-func (x Uint32x16) RotateAllRightMasked(shift uint8, mask Mask32x16) Uint32x16
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORQ, CPU Feature: AVX512
-func (x Uint64x2) RotateAllRightMasked(shift uint8, mask Mask64x2) Uint64x2
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORQ, CPU Feature: AVX512
-func (x Uint64x4) RotateAllRightMasked(shift uint8, mask Mask64x4) Uint64x4
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORQ, CPU Feature: AVX512
-func (x Uint64x8) RotateAllRightMasked(shift uint8, mask Mask64x8) Uint64x8
-
 /* RotateLeft */
 
 // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
@@ -8668,92 +4112,6 @@ func (x Uint64x4) RotateLeft(y Uint64x4) Uint64x4
 // Asm: VPROLVQ, CPU Feature: AVX512
 func (x Uint64x8) RotateLeft(y Uint64x8) Uint64x8
 
-/* RotateLeftMasked */
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVD, CPU Feature: AVX512
-func (x Int32x4) RotateLeftMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVD, CPU Feature: AVX512
-func (x Int32x8) RotateLeftMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVD, CPU Feature: AVX512
-func (x Int32x16) RotateLeftMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVQ, CPU Feature: AVX512
-func (x Int64x2) RotateLeftMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVQ, CPU Feature: AVX512
-func (x Int64x4) RotateLeftMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVQ, CPU Feature: AVX512
-func (x Int64x8) RotateLeftMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVD, CPU Feature: AVX512
-func (x Uint32x4) RotateLeftMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVD, CPU Feature: AVX512
-func (x Uint32x8) RotateLeftMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVD, CPU Feature: AVX512
-func (x Uint32x16) RotateLeftMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVQ, CPU Feature: AVX512
-func (x Uint64x2) RotateLeftMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVQ, CPU Feature: AVX512
-func (x Uint64x4) RotateLeftMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVQ, CPU Feature: AVX512
-func (x Uint64x8) RotateLeftMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* RotateRight */
 
 // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
@@ -8816,92 +4174,6 @@ func (x Uint64x4) RotateRight(y Uint64x4) Uint64x4
 // Asm: VPRORVQ, CPU Feature: AVX512
 func (x Uint64x8) RotateRight(y Uint64x8) Uint64x8
 
-/* RotateRightMasked */
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVD, CPU Feature: AVX512
-func (x Int32x4) RotateRightMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVD, CPU Feature: AVX512
-func (x Int32x8) RotateRightMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVD, CPU Feature: AVX512
-func (x Int32x16) RotateRightMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512
-func (x Int64x2) RotateRightMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512
-func (x Int64x4) RotateRightMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512
-func (x Int64x8) RotateRightMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVD, CPU Feature: AVX512
-func (x Uint32x4) RotateRightMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVD, CPU Feature: AVX512
-func (x Uint32x8) RotateRightMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVD, CPU Feature: AVX512
-func (x Uint32x16) RotateRightMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512
-func (x Uint64x2) RotateRightMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512
-func (x Uint64x4) RotateRightMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512
-func (x Uint64x8) RotateRightMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* RoundToEven */
 
 // RoundToEven rounds elements to the nearest integer.
@@ -8968,62 +4240,6 @@ func (x Float64x4) RoundToEvenScaled(prec uint8) Float64x4
 // Asm: VRNDSCALEPD, CPU Feature: AVX512
 func (x Float64x8) RoundToEvenScaled(prec uint8) Float64x8
 
-/* RoundToEvenScaledMasked */
-
-// RoundToEvenScaledMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x4) RoundToEvenScaledMasked(prec uint8, mask Mask32x4) Float32x4
-
-// RoundToEvenScaledMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x8) RoundToEvenScaledMasked(prec uint8, mask Mask32x8) Float32x8
-
-// RoundToEvenScaledMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x16) RoundToEvenScaledMasked(prec uint8, mask Mask32x16) Float32x16
-
-// RoundToEvenScaledMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x2) RoundToEvenScaledMasked(prec uint8, mask Mask64x2) Float64x2
-
-// RoundToEvenScaledMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x4) RoundToEvenScaledMasked(prec uint8, mask Mask64x4) Float64x4
-
-// RoundToEvenScaledMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x8) RoundToEvenScaledMasked(prec uint8, mask Mask64x8) Float64x8
-
 /* RoundToEvenScaledResidue */
 
 // RoundToEvenScaledResidue computes the difference after rounding with specified precision.
@@ -9068,62 +4284,6 @@ func (x Float64x4) RoundToEvenScaledResidue(prec uint8) Float64x4
 // Asm: VREDUCEPD, CPU Feature: AVX512
 func (x Float64x8) RoundToEvenScaledResidue(prec uint8) Float64x8
 
-/* RoundToEvenScaledResidueMasked */
-
-// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x4) RoundToEvenScaledResidueMasked(prec uint8, mask Mask32x4) Float32x4
-
-// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x8) RoundToEvenScaledResidueMasked(prec uint8, mask Mask32x8) Float32x8
-
-// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x16) RoundToEvenScaledResidueMasked(prec uint8, mask Mask32x16) Float32x16
-
-// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x2) RoundToEvenScaledResidueMasked(prec uint8, mask Mask64x2) Float64x2
-
-// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x4) RoundToEvenScaledResidueMasked(prec uint8, mask Mask64x4) Float64x4
-
-// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x8) RoundToEvenScaledResidueMasked(prec uint8, mask Mask64x8) Float64x8
-
 /* Scale */
 
 // Scale multiplies elements by a power of 2.
@@ -9156,50 +4316,6 @@ func (x Float64x4) Scale(y Float64x4) Float64x4
 // Asm: VSCALEFPD, CPU Feature: AVX512
 func (x Float64x8) Scale(y Float64x8) Float64x8
 
-/* ScaleMasked */
-
-// ScaleMasked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSCALEFPS, CPU Feature: AVX512
-func (x Float32x4) ScaleMasked(y Float32x4, mask Mask32x4) Float32x4
-
-// ScaleMasked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSCALEFPS, CPU Feature: AVX512
-func (x Float32x8) ScaleMasked(y Float32x8, mask Mask32x8) Float32x8
-
-// ScaleMasked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSCALEFPS, CPU Feature: AVX512
-func (x Float32x16) ScaleMasked(y Float32x16, mask Mask32x16) Float32x16
-
-// ScaleMasked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSCALEFPD, CPU Feature: AVX512
-func (x Float64x2) ScaleMasked(y Float64x2, mask Mask64x2) Float64x2
-
-// ScaleMasked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSCALEFPD, CPU Feature: AVX512
-func (x Float64x4) ScaleMasked(y Float64x4, mask Mask64x4) Float64x4
-
-// ScaleMasked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSCALEFPD, CPU Feature: AVX512
-func (x Float64x8) ScaleMasked(y Float64x8, mask Mask64x8) Float64x8
-
 /* SetElem */
 
 // SetElem sets a single constant-indexed element's value.
@@ -9714,316 +4830,6 @@ func (x Uint64x4) ShiftAllLeftConcat(shift uint8, y Uint64x4) Uint64x4
 // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
 func (x Uint64x8) ShiftAllLeftConcat(shift uint8, y Uint64x8) Uint64x8
 
-/* ShiftAllLeftConcatMasked */
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftAllLeftConcatMasked(shift uint8, y Int16x8, mask Mask16x8) Int16x8
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftAllLeftConcatMasked(shift uint8, y Int16x16, mask Mask16x16) Int16x16
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftAllLeftConcatMasked(shift uint8, y Int16x32, mask Mask16x32) Int16x32
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftAllLeftConcatMasked(shift uint8, y Int32x4, mask Mask32x4) Int32x4
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftAllLeftConcatMasked(shift uint8, y Int32x8, mask Mask32x8) Int32x8
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftAllLeftConcatMasked(shift uint8, y Int32x16, mask Mask32x16) Int32x16
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftAllLeftConcatMasked(shift uint8, y Int64x2, mask Mask64x2) Int64x2
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftAllLeftConcatMasked(shift uint8, y Int64x4, mask Mask64x4) Int64x4
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftAllLeftConcatMasked(shift uint8, y Int64x8, mask Mask64x8) Int64x8
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftAllLeftConcatMasked(shift uint8, y Uint16x8, mask Mask16x8) Uint16x8
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftAllLeftConcatMasked(shift uint8, y Uint16x16, mask Mask16x16) Uint16x16
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftAllLeftConcatMasked(shift uint8, y Uint16x32, mask Mask16x32) Uint16x32
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftAllLeftConcatMasked(shift uint8, y Uint32x4, mask Mask32x4) Uint32x4
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftAllLeftConcatMasked(shift uint8, y Uint32x8, mask Mask32x8) Uint32x8
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftAllLeftConcatMasked(shift uint8, y Uint32x16, mask Mask32x16) Uint32x16
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftAllLeftConcatMasked(shift uint8, y Uint64x2, mask Mask64x2) Uint64x2
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftAllLeftConcatMasked(shift uint8, y Uint64x4, mask Mask64x4) Uint64x4
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftAllLeftConcatMasked(shift uint8, y Uint64x8, mask Mask64x8) Uint64x8
-
-/* ShiftAllLeftMasked */
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLW, CPU Feature: AVX512
-func (x Int16x8) ShiftAllLeftMasked(y uint64, mask Mask16x8) Int16x8
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLW, CPU Feature: AVX512
-func (x Int16x16) ShiftAllLeftMasked(y uint64, mask Mask16x16) Int16x16
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLW, CPU Feature: AVX512
-func (x Int16x32) ShiftAllLeftMasked(y uint64, mask Mask16x32) Int16x32
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLD, CPU Feature: AVX512
-func (x Int32x4) ShiftAllLeftMasked(y uint64, mask Mask32x4) Int32x4
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLD, CPU Feature: AVX512
-func (x Int32x8) ShiftAllLeftMasked(y uint64, mask Mask32x8) Int32x8
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLD, CPU Feature: AVX512
-func (x Int32x16) ShiftAllLeftMasked(y uint64, mask Mask32x16) Int32x16
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLQ, CPU Feature: AVX512
-func (x Int64x2) ShiftAllLeftMasked(y uint64, mask Mask64x2) Int64x2
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLQ, CPU Feature: AVX512
-func (x Int64x4) ShiftAllLeftMasked(y uint64, mask Mask64x4) Int64x4
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLQ, CPU Feature: AVX512
-func (x Int64x8) ShiftAllLeftMasked(y uint64, mask Mask64x8) Int64x8
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLW, CPU Feature: AVX512
-func (x Uint16x8) ShiftAllLeftMasked(y uint64, mask Mask16x8) Uint16x8
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLW, CPU Feature: AVX512
-func (x Uint16x16) ShiftAllLeftMasked(y uint64, mask Mask16x16) Uint16x16
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLW, CPU Feature: AVX512
-func (x Uint16x32) ShiftAllLeftMasked(y uint64, mask Mask16x32) Uint16x32
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLD, CPU Feature: AVX512
-func (x Uint32x4) ShiftAllLeftMasked(y uint64, mask Mask32x4) Uint32x4
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLD, CPU Feature: AVX512
-func (x Uint32x8) ShiftAllLeftMasked(y uint64, mask Mask32x8) Uint32x8
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLD, CPU Feature: AVX512
-func (x Uint32x16) ShiftAllLeftMasked(y uint64, mask Mask32x16) Uint32x16
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLQ, CPU Feature: AVX512
-func (x Uint64x2) ShiftAllLeftMasked(y uint64, mask Mask64x2) Uint64x2
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLQ, CPU Feature: AVX512
-func (x Uint64x4) ShiftAllLeftMasked(y uint64, mask Mask64x4) Uint64x4
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLQ, CPU Feature: AVX512
-func (x Uint64x8) ShiftAllLeftMasked(y uint64, mask Mask64x8) Uint64x8
-
 /* ShiftAllRight */
 
 // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
@@ -10262,316 +5068,6 @@ func (x Uint64x4) ShiftAllRightConcat(shift uint8, y Uint64x4) Uint64x4
 // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
 func (x Uint64x8) ShiftAllRightConcat(shift uint8, y Uint64x8) Uint64x8
 
-/* ShiftAllRightConcatMasked */
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftAllRightConcatMasked(shift uint8, y Int16x8, mask Mask16x8) Int16x8
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftAllRightConcatMasked(shift uint8, y Int16x16, mask Mask16x16) Int16x16
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftAllRightConcatMasked(shift uint8, y Int16x32, mask Mask16x32) Int16x32
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftAllRightConcatMasked(shift uint8, y Int32x4, mask Mask32x4) Int32x4
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftAllRightConcatMasked(shift uint8, y Int32x8, mask Mask32x8) Int32x8
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftAllRightConcatMasked(shift uint8, y Int32x16, mask Mask32x16) Int32x16
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftAllRightConcatMasked(shift uint8, y Int64x2, mask Mask64x2) Int64x2
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftAllRightConcatMasked(shift uint8, y Int64x4, mask Mask64x4) Int64x4
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftAllRightConcatMasked(shift uint8, y Int64x8, mask Mask64x8) Int64x8
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftAllRightConcatMasked(shift uint8, y Uint16x8, mask Mask16x8) Uint16x8
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftAllRightConcatMasked(shift uint8, y Uint16x16, mask Mask16x16) Uint16x16
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftAllRightConcatMasked(shift uint8, y Uint16x32, mask Mask16x32) Uint16x32
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftAllRightConcatMasked(shift uint8, y Uint32x4, mask Mask32x4) Uint32x4
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftAllRightConcatMasked(shift uint8, y Uint32x8, mask Mask32x8) Uint32x8
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftAllRightConcatMasked(shift uint8, y Uint32x16, mask Mask32x16) Uint32x16
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftAllRightConcatMasked(shift uint8, y Uint64x2, mask Mask64x2) Uint64x2
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftAllRightConcatMasked(shift uint8, y Uint64x4, mask Mask64x4) Uint64x4
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftAllRightConcatMasked(shift uint8, y Uint64x8, mask Mask64x8) Uint64x8
-
-/* ShiftAllRightMasked */
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAW, CPU Feature: AVX512
-func (x Int16x8) ShiftAllRightMasked(y uint64, mask Mask16x8) Int16x8
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAW, CPU Feature: AVX512
-func (x Int16x16) ShiftAllRightMasked(y uint64, mask Mask16x16) Int16x16
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAW, CPU Feature: AVX512
-func (x Int16x32) ShiftAllRightMasked(y uint64, mask Mask16x32) Int16x32
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAD, CPU Feature: AVX512
-func (x Int32x4) ShiftAllRightMasked(y uint64, mask Mask32x4) Int32x4
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAD, CPU Feature: AVX512
-func (x Int32x8) ShiftAllRightMasked(y uint64, mask Mask32x8) Int32x8
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAD, CPU Feature: AVX512
-func (x Int32x16) ShiftAllRightMasked(y uint64, mask Mask32x16) Int32x16
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAQ, CPU Feature: AVX512
-func (x Int64x2) ShiftAllRightMasked(y uint64, mask Mask64x2) Int64x2
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAQ, CPU Feature: AVX512
-func (x Int64x4) ShiftAllRightMasked(y uint64, mask Mask64x4) Int64x4
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAQ, CPU Feature: AVX512
-func (x Int64x8) ShiftAllRightMasked(y uint64, mask Mask64x8) Int64x8
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLW, CPU Feature: AVX512
-func (x Uint16x8) ShiftAllRightMasked(y uint64, mask Mask16x8) Uint16x8
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLW, CPU Feature: AVX512
-func (x Uint16x16) ShiftAllRightMasked(y uint64, mask Mask16x16) Uint16x16
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLW, CPU Feature: AVX512
-func (x Uint16x32) ShiftAllRightMasked(y uint64, mask Mask16x32) Uint16x32
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLD, CPU Feature: AVX512
-func (x Uint32x4) ShiftAllRightMasked(y uint64, mask Mask32x4) Uint32x4
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLD, CPU Feature: AVX512
-func (x Uint32x8) ShiftAllRightMasked(y uint64, mask Mask32x8) Uint32x8
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLD, CPU Feature: AVX512
-func (x Uint32x16) ShiftAllRightMasked(y uint64, mask Mask32x16) Uint32x16
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLQ, CPU Feature: AVX512
-func (x Uint64x2) ShiftAllRightMasked(y uint64, mask Mask64x2) Uint64x2
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLQ, CPU Feature: AVX512
-func (x Uint64x4) ShiftAllRightMasked(y uint64, mask Mask64x4) Uint64x4
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLQ, CPU Feature: AVX512
-func (x Uint64x8) ShiftAllRightMasked(y uint64, mask Mask64x8) Uint64x8
-
 /* ShiftLeft */
 
 // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
@@ -10774,280 +5270,6 @@ func (x Uint64x4) ShiftLeftConcat(y Uint64x4, z Uint64x4) Uint64x4
 // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
 func (x Uint64x8) ShiftLeftConcat(y Uint64x8, z Uint64x8) Uint64x8
 
-/* ShiftLeftConcatMasked */
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftLeftConcatMasked(y Int16x8, z Int16x8, mask Mask16x8) Int16x8
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftLeftConcatMasked(y Int16x16, z Int16x16, mask Mask16x16) Int16x16
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftLeftConcatMasked(y Int16x32, z Int16x32, mask Mask16x32) Int16x32
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftLeftConcatMasked(y Int32x4, z Int32x4, mask Mask32x4) Int32x4
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftLeftConcatMasked(y Int32x8, z Int32x8, mask Mask32x8) Int32x8
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftLeftConcatMasked(y Int32x16, z Int32x16, mask Mask32x16) Int32x16
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftLeftConcatMasked(y Int64x2, z Int64x2, mask Mask64x2) Int64x2
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftLeftConcatMasked(y Int64x4, z Int64x4, mask Mask64x4) Int64x4
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftLeftConcatMasked(y Int64x8, z Int64x8, mask Mask64x8) Int64x8
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftLeftConcatMasked(y Uint16x8, z Uint16x8, mask Mask16x8) Uint16x8
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftLeftConcatMasked(y Uint16x16, z Uint16x16, mask Mask16x16) Uint16x16
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftLeftConcatMasked(y Uint16x32, z Uint16x32, mask Mask16x32) Uint16x32
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftLeftConcatMasked(y Uint32x4, z Uint32x4, mask Mask32x4) Uint32x4
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftLeftConcatMasked(y Uint32x8, z Uint32x8, mask Mask32x8) Uint32x8
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftLeftConcatMasked(y Uint32x16, z Uint32x16, mask Mask32x16) Uint32x16
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftLeftConcatMasked(y Uint64x2, z Uint64x2, mask Mask64x2) Uint64x2
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftLeftConcatMasked(y Uint64x4, z Uint64x4, mask Mask64x4) Uint64x4
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftLeftConcatMasked(y Uint64x8, z Uint64x8, mask Mask64x8) Uint64x8
-
-/* ShiftLeftMasked */
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVW, CPU Feature: AVX512
-func (x Int16x8) ShiftLeftMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVW, CPU Feature: AVX512
-func (x Int16x16) ShiftLeftMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVW, CPU Feature: AVX512
-func (x Int16x32) ShiftLeftMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVD, CPU Feature: AVX512
-func (x Int32x4) ShiftLeftMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVD, CPU Feature: AVX512
-func (x Int32x8) ShiftLeftMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVD, CPU Feature: AVX512
-func (x Int32x16) ShiftLeftMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVQ, CPU Feature: AVX512
-func (x Int64x2) ShiftLeftMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVQ, CPU Feature: AVX512
-func (x Int64x4) ShiftLeftMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVQ, CPU Feature: AVX512
-func (x Int64x8) ShiftLeftMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVW, CPU Feature: AVX512
-func (x Uint16x8) ShiftLeftMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVW, CPU Feature: AVX512
-func (x Uint16x16) ShiftLeftMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVW, CPU Feature: AVX512
-func (x Uint16x32) ShiftLeftMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVD, CPU Feature: AVX512
-func (x Uint32x4) ShiftLeftMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVD, CPU Feature: AVX512
-func (x Uint32x8) ShiftLeftMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVD, CPU Feature: AVX512
-func (x Uint32x16) ShiftLeftMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVQ, CPU Feature: AVX512
-func (x Uint64x2) ShiftLeftMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVQ, CPU Feature: AVX512
-func (x Uint64x4) ShiftLeftMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVQ, CPU Feature: AVX512
-func (x Uint64x8) ShiftLeftMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* ShiftRight */
 
 // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
@@ -11250,280 +5472,6 @@ func (x Uint64x4) ShiftRightConcat(y Uint64x4, z Uint64x4) Uint64x4
 // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
 func (x Uint64x8) ShiftRightConcat(y Uint64x8, z Uint64x8) Uint64x8
 
-/* ShiftRightConcatMasked */
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftRightConcatMasked(y Int16x8, z Int16x8, mask Mask16x8) Int16x8
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftRightConcatMasked(y Int16x16, z Int16x16, mask Mask16x16) Int16x16
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftRightConcatMasked(y Int16x32, z Int16x32, mask Mask16x32) Int16x32
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftRightConcatMasked(y Int32x4, z Int32x4, mask Mask32x4) Int32x4
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftRightConcatMasked(y Int32x8, z Int32x8, mask Mask32x8) Int32x8
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftRightConcatMasked(y Int32x16, z Int32x16, mask Mask32x16) Int32x16
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftRightConcatMasked(y Int64x2, z Int64x2, mask Mask64x2) Int64x2
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftRightConcatMasked(y Int64x4, z Int64x4, mask Mask64x4) Int64x4
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftRightConcatMasked(y Int64x8, z Int64x8, mask Mask64x8) Int64x8
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftRightConcatMasked(y Uint16x8, z Uint16x8, mask Mask16x8) Uint16x8
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftRightConcatMasked(y Uint16x16, z Uint16x16, mask Mask16x16) Uint16x16
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftRightConcatMasked(y Uint16x32, z Uint16x32, mask Mask16x32) Uint16x32
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftRightConcatMasked(y Uint32x4, z Uint32x4, mask Mask32x4) Uint32x4
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftRightConcatMasked(y Uint32x8, z Uint32x8, mask Mask32x8) Uint32x8
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftRightConcatMasked(y Uint32x16, z Uint32x16, mask Mask32x16) Uint32x16
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftRightConcatMasked(y Uint64x2, z Uint64x2, mask Mask64x2) Uint64x2
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftRightConcatMasked(y Uint64x4, z Uint64x4, mask Mask64x4) Uint64x4
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftRightConcatMasked(y Uint64x8, z Uint64x8, mask Mask64x8) Uint64x8
-
-/* ShiftRightMasked */
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512
-func (x Int16x8) ShiftRightMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512
-func (x Int16x16) ShiftRightMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512
-func (x Int16x32) ShiftRightMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512
-func (x Int32x4) ShiftRightMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512
-func (x Int32x8) ShiftRightMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512
-func (x Int32x16) ShiftRightMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512
-func (x Int64x2) ShiftRightMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512
-func (x Int64x4) ShiftRightMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512
-func (x Int64x8) ShiftRightMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLVW, CPU Feature: AVX512
-func (x Uint16x8) ShiftRightMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLVW, CPU Feature: AVX512
-func (x Uint16x16) ShiftRightMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLVW, CPU Feature: AVX512
-func (x Uint16x32) ShiftRightMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLVD, CPU Feature: AVX512
-func (x Uint32x4) ShiftRightMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLVD, CPU Feature: AVX512
-func (x Uint32x8) ShiftRightMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLVD, CPU Feature: AVX512
-func (x Uint32x16) ShiftRightMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLVQ, CPU Feature: AVX512
-func (x Uint64x2) ShiftRightMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLVQ, CPU Feature: AVX512
-func (x Uint64x4) ShiftRightMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLVQ, CPU Feature: AVX512
-func (x Uint64x8) ShiftRightMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* Sqrt */
 
 // Sqrt computes the square root of each element.
@@ -11556,50 +5504,6 @@ func (x Float64x4) Sqrt() Float64x4
 // Asm: VSQRTPD, CPU Feature: AVX512
 func (x Float64x8) Sqrt() Float64x8
 
-/* SqrtMasked */
-
-// SqrtMasked computes the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSQRTPS, CPU Feature: AVX512
-func (x Float32x4) SqrtMasked(mask Mask32x4) Float32x4
-
-// SqrtMasked computes the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSQRTPS, CPU Feature: AVX512
-func (x Float32x8) SqrtMasked(mask Mask32x8) Float32x8
-
-// SqrtMasked computes the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSQRTPS, CPU Feature: AVX512
-func (x Float32x16) SqrtMasked(mask Mask32x16) Float32x16
-
-// SqrtMasked computes the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSQRTPD, CPU Feature: AVX512
-func (x Float64x2) SqrtMasked(mask Mask64x2) Float64x2
-
-// SqrtMasked computes the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSQRTPD, CPU Feature: AVX512
-func (x Float64x4) SqrtMasked(mask Mask64x4) Float64x4
-
-// SqrtMasked computes the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSQRTPD, CPU Feature: AVX512
-func (x Float64x8) SqrtMasked(mask Mask64x8) Float64x8
-
 /* Sub */
 
 // Sub subtracts corresponding elements of two vectors.
@@ -11752,218 +5656,6 @@ func (x Uint64x4) Sub(y Uint64x4) Uint64x4
 // Asm: VPSUBQ, CPU Feature: AVX512
 func (x Uint64x8) Sub(y Uint64x8) Uint64x8
 
-/* SubMasked */
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSUBPS, CPU Feature: AVX512
-func (x Float32x4) SubMasked(y Float32x4, mask Mask32x4) Float32x4
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSUBPS, CPU Feature: AVX512
-func (x Float32x8) SubMasked(y Float32x8, mask Mask32x8) Float32x8
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSUBPS, CPU Feature: AVX512
-func (x Float32x16) SubMasked(y Float32x16, mask Mask32x16) Float32x16
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSUBPD, CPU Feature: AVX512
-func (x Float64x2) SubMasked(y Float64x2, mask Mask64x2) Float64x2
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSUBPD, CPU Feature: AVX512
-func (x Float64x4) SubMasked(y Float64x4, mask Mask64x4) Float64x4
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSUBPD, CPU Feature: AVX512
-func (x Float64x8) SubMasked(y Float64x8, mask Mask64x8) Float64x8
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBB, CPU Feature: AVX512
-func (x Int8x16) SubMasked(y Int8x16, mask Mask8x16) Int8x16
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBB, CPU Feature: AVX512
-func (x Int8x32) SubMasked(y Int8x32, mask Mask8x32) Int8x32
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBB, CPU Feature: AVX512
-func (x Int8x64) SubMasked(y Int8x64, mask Mask8x64) Int8x64
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBW, CPU Feature: AVX512
-func (x Int16x8) SubMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBW, CPU Feature: AVX512
-func (x Int16x16) SubMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBW, CPU Feature: AVX512
-func (x Int16x32) SubMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBD, CPU Feature: AVX512
-func (x Int32x4) SubMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBD, CPU Feature: AVX512
-func (x Int32x8) SubMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBD, CPU Feature: AVX512
-func (x Int32x16) SubMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBQ, CPU Feature: AVX512
-func (x Int64x2) SubMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBQ, CPU Feature: AVX512
-func (x Int64x4) SubMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBQ, CPU Feature: AVX512
-func (x Int64x8) SubMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBB, CPU Feature: AVX512
-func (x Uint8x16) SubMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBB, CPU Feature: AVX512
-func (x Uint8x32) SubMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBB, CPU Feature: AVX512
-func (x Uint8x64) SubMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBW, CPU Feature: AVX512
-func (x Uint16x8) SubMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBW, CPU Feature: AVX512
-func (x Uint16x16) SubMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBW, CPU Feature: AVX512
-func (x Uint16x32) SubMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBD, CPU Feature: AVX512
-func (x Uint32x4) SubMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBD, CPU Feature: AVX512
-func (x Uint32x8) SubMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBD, CPU Feature: AVX512
-func (x Uint32x16) SubMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBQ, CPU Feature: AVX512
-func (x Uint64x2) SubMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBQ, CPU Feature: AVX512
-func (x Uint64x4) SubMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBQ, CPU Feature: AVX512
-func (x Uint64x8) SubMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* SubPairs */
 
 // SubPairs horizontally subtracts adjacent pairs of elements.
@@ -12114,92 +5806,6 @@ func (x Uint16x16) SubSaturated(y Uint16x16) Uint16x16
 // Asm: VPSUBUSW, CPU Feature: AVX512
 func (x Uint16x32) SubSaturated(y Uint16x32) Uint16x32
 
-/* SubSaturatedMasked */
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBSB, CPU Feature: AVX512
-func (x Int8x16) SubSaturatedMasked(y Int8x16, mask Mask8x16) Int8x16
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBSB, CPU Feature: AVX512
-func (x Int8x32) SubSaturatedMasked(y Int8x32, mask Mask8x32) Int8x32
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBSB, CPU Feature: AVX512
-func (x Int8x64) SubSaturatedMasked(y Int8x64, mask Mask8x64) Int8x64
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBSW, CPU Feature: AVX512
-func (x Int16x8) SubSaturatedMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBSW, CPU Feature: AVX512
-func (x Int16x16) SubSaturatedMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBSW, CPU Feature: AVX512
-func (x Int16x32) SubSaturatedMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBUSB, CPU Feature: AVX512
-func (x Uint8x16) SubSaturatedMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBUSB, CPU Feature: AVX512
-func (x Uint8x32) SubSaturatedMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBUSB, CPU Feature: AVX512
-func (x Uint8x64) SubSaturatedMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBUSW, CPU Feature: AVX512
-func (x Uint16x8) SubSaturatedMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBUSW, CPU Feature: AVX512
-func (x Uint16x16) SubSaturatedMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBUSW, CPU Feature: AVX512
-func (x Uint16x32) SubSaturatedMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
 /* Trunc */
 
 // Trunc truncates elements towards zero.
@@ -12266,62 +5872,6 @@ func (x Float64x4) TruncScaled(prec uint8) Float64x4
 // Asm: VRNDSCALEPD, CPU Feature: AVX512
 func (x Float64x8) TruncScaled(prec uint8) Float64x8
 
-/* TruncScaledMasked */
-
-// TruncScaledMasked truncates elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x4) TruncScaledMasked(prec uint8, mask Mask32x4) Float32x4
-
-// TruncScaledMasked truncates elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x8) TruncScaledMasked(prec uint8, mask Mask32x8) Float32x8
-
-// TruncScaledMasked truncates elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x16) TruncScaledMasked(prec uint8, mask Mask32x16) Float32x16
-
-// TruncScaledMasked truncates elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x2) TruncScaledMasked(prec uint8, mask Mask64x2) Float64x2
-
-// TruncScaledMasked truncates elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x4) TruncScaledMasked(prec uint8, mask Mask64x4) Float64x4
-
-// TruncScaledMasked truncates elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x8) TruncScaledMasked(prec uint8, mask Mask64x8) Float64x8
-
 /* TruncScaledResidue */
 
 // TruncScaledResidue computes the difference after truncating with specified precision.
@@ -12366,62 +5916,6 @@ func (x Float64x4) TruncScaledResidue(prec uint8) Float64x4
 // Asm: VREDUCEPD, CPU Feature: AVX512
 func (x Float64x8) TruncScaledResidue(prec uint8) Float64x8
 
-/* TruncScaledResidueMasked */
-
-// TruncScaledResidueMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x4) TruncScaledResidueMasked(prec uint8, mask Mask32x4) Float32x4
-
-// TruncScaledResidueMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x8) TruncScaledResidueMasked(prec uint8, mask Mask32x8) Float32x8
-
-// TruncScaledResidueMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x16) TruncScaledResidueMasked(prec uint8, mask Mask32x16) Float32x16
-
-// TruncScaledResidueMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x2) TruncScaledResidueMasked(prec uint8, mask Mask64x2) Float64x2
-
-// TruncScaledResidueMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x4) TruncScaledResidueMasked(prec uint8, mask Mask64x4) Float64x4
-
-// TruncScaledResidueMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x8) TruncScaledResidueMasked(prec uint8, mask Mask64x8) Float64x8
-
 /* Xor */
 
 // Xor performs a bitwise XOR operation between two vectors.
@@ -12544,92 +6038,6 @@ func (x Uint64x4) Xor(y Uint64x4) Uint64x4
 // Asm: VPXORQ, CPU Feature: AVX512
 func (x Uint64x8) Xor(y Uint64x8) Uint64x8
 
-/* XorMasked */
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORD, CPU Feature: AVX512
-func (x Int32x4) XorMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORD, CPU Feature: AVX512
-func (x Int32x8) XorMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORD, CPU Feature: AVX512
-func (x Int32x16) XorMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORQ, CPU Feature: AVX512
-func (x Int64x2) XorMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORQ, CPU Feature: AVX512
-func (x Int64x4) XorMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORQ, CPU Feature: AVX512
-func (x Int64x8) XorMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORD, CPU Feature: AVX512
-func (x Uint32x4) XorMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORD, CPU Feature: AVX512
-func (x Uint32x8) XorMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORD, CPU Feature: AVX512
-func (x Uint32x16) XorMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORQ, CPU Feature: AVX512
-func (x Uint64x2) XorMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORQ, CPU Feature: AVX512
-func (x Uint64x4) XorMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORQ, CPU Feature: AVX512
-func (x Uint64x8) XorMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* blend */
 
 // blend blends two vectors based on mask values, choosing either
diff --git a/src/simd/simd_test.go b/src/simd/simd_test.go
index 3faeeaccfde..c88fe4b9fef 100644
--- a/src/simd/simd_test.go
+++ b/src/simd/simd_test.go
@@ -43,7 +43,7 @@ func TestType(t *testing.T) {
 		return
 	}
 	v.z = maskT(simd.Mask32x4FromBits(0b0011))
-	*v.y = v.y.AddMasked(v.x, simd.Mask32x4(v.z))
+	*v.y = v.y.Add(v.x).Masked(simd.Mask32x4(v.z))
 
 	got := [4]int32{}
 	v.y.Store(&got)
@@ -121,7 +121,7 @@ func TestMaskConversion(t *testing.T) {
 	}
 	x := simd.LoadInt32x4Slice([]int32{5, 0, 7, 0})
 	mask := simd.Int32x4{}.Sub(x).ToMask()
-	y := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4}).AddMasked(x, mask)
+	y := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4}).Add(x).Masked(mask)
 	want := [4]int32{6, 0, 10, 0}
 	got := make([]int32, 4)
 	y.StoreSlice(got)
@@ -327,7 +327,7 @@ func TestBitMaskLoad(t *testing.T) {
 	results := [2]int64{}
 	want := [2]int64{0, 6}
 	m := simd.LoadMask64x2FromBits(&bits)
-	simd.LoadInt64x2Slice([]int64{1, 2}).AddMasked(simd.LoadInt64x2Slice([]int64{3, 4}), m).Store(&results)
+	simd.LoadInt64x2Slice([]int64{1, 2}).Add(simd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results)
 	for i := range 2 {
 		if results[i] != want[i] {
 			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i])
@@ -359,7 +359,7 @@ func TestBitMaskFromBits(t *testing.T) {
 	results := [2]int64{}
 	want := [2]int64{0, 6}
 	m := simd.Mask64x2FromBits(0b10)
-	simd.LoadInt64x2Slice([]int64{1, 2}).AddMasked(simd.LoadInt64x2Slice([]int64{3, 4}), m).Store(&results)
+	simd.LoadInt64x2Slice([]int64{1, 2}).Add(simd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results)
 	for i := range 2 {
 		if results[i] != want[i] {
 			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i])